SourceManager.cpp revision 3050ed44aea059f1bb48d05b6dd64478bd8ed0aa
1//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/MemoryBuffer.h"
18#include "llvm/System/Path.h"
19#include "llvm/Bitcode/Serialize.h"
20#include "llvm/Bitcode/Deserialize.h"
21#include "llvm/Support/Streams.h"
22#include <algorithm>
23using namespace clang;
24using namespace SrcMgr;
25using llvm::MemoryBuffer;
26
27//===----------------------------------------------------------------------===//
28// SourceManager Helper Classes
29//===----------------------------------------------------------------------===//
30
31ContentCache::~ContentCache() {
32  delete Buffer;
33}
34
35/// getSizeBytesMapped - Returns the number of bytes actually mapped for
36///  this ContentCache.  This can be 0 if the MemBuffer was not actually
37///  instantiated.
38unsigned ContentCache::getSizeBytesMapped() const {
39  return Buffer ? Buffer->getBufferSize() : 0;
40}
41
42/// getSize - Returns the size of the content encapsulated by this ContentCache.
43///  This can be the size of the source file or the size of an arbitrary
44///  scratch buffer.  If the ContentCache encapsulates a source file, that
45///  file is not lazily brought in from disk to satisfy this query.
46unsigned ContentCache::getSize() const {
47  return Entry ? Entry->getSize() : Buffer->getBufferSize();
48}
49
50const llvm::MemoryBuffer *ContentCache::getBuffer() const {
51  // Lazily create the Buffer for ContentCaches that wrap files.
52  if (!Buffer && Entry) {
53    // FIXME: Should we support a way to not have to do this check over
54    //   and over if we cannot open the file?
55    Buffer = MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize());
56  }
57  return Buffer;
58}
59
60//===----------------------------------------------------------------------===//
61// Line Table Implementation
62//===----------------------------------------------------------------------===//
63
64namespace clang {
65struct LineEntry {
66  /// FileOffset - The offset in this file that the line entry occurs at.
67  unsigned FileOffset;
68
69  /// LineNo - The presumed line number of this line entry: #line 4.
70  unsigned LineNo;
71
72  /// FilenameID - The ID of the filename identified by this line entry:
73  /// #line 4 "foo.c".  This is -1 if not specified.
74  int FilenameID;
75
76  /// Flags - Set the 0 if no flags, 1 if a system header,
77  SrcMgr::CharacteristicKind FileKind;
78
79  /// IncludeOffset - This is the offset of the virtual include stack location,
80  /// which is manipulated by GNU linemarker directives.  If this is 0 then
81  /// there is no virtual #includer.
82  unsigned IncludeOffset;
83
84  static LineEntry get(unsigned Offs, unsigned Line, int Filename,
85                       SrcMgr::CharacteristicKind FileKind,
86                       unsigned IncludeOffset) {
87    LineEntry E;
88    E.FileOffset = Offs;
89    E.LineNo = Line;
90    E.FilenameID = Filename;
91    E.FileKind = FileKind;
92    E.IncludeOffset = IncludeOffset;
93    return E;
94  }
95};
96
97// needed for FindNearestLineEntry (upper_bound of LineEntry)
98inline bool operator<(const LineEntry &lhs, const LineEntry &rhs) {
99  // FIXME: should check the other field?
100  return lhs.FileOffset < rhs.FileOffset;
101}
102
103inline bool operator<(const LineEntry &E, unsigned Offset) {
104  return E.FileOffset < Offset;
105}
106
107inline bool operator<(unsigned Offset, const LineEntry &E) {
108  return Offset < E.FileOffset;
109}
110
111/// LineTableInfo - This class is used to hold and unique data used to
112/// represent #line information.
113class LineTableInfo {
114  /// FilenameIDs - This map is used to assign unique IDs to filenames in
115  /// #line directives.  This allows us to unique the filenames that
116  /// frequently reoccur and reference them with indices.  FilenameIDs holds
117  /// the mapping from string -> ID, and FilenamesByID holds the mapping of ID
118  /// to string.
119  llvm::StringMap<unsigned, llvm::BumpPtrAllocator> FilenameIDs;
120  std::vector<llvm::StringMapEntry<unsigned>*> FilenamesByID;
121
122  /// LineEntries - This is a map from FileIDs to a list of line entries (sorted
123  /// by the offset they occur in the file.
124  std::map<unsigned, std::vector<LineEntry> > LineEntries;
125public:
126  LineTableInfo() {
127  }
128
129  void clear() {
130    FilenameIDs.clear();
131    FilenamesByID.clear();
132    LineEntries.clear();
133  }
134
135  ~LineTableInfo() {}
136
137  unsigned getLineTableFilenameID(const char *Ptr, unsigned Len);
138  const char *getFilename(unsigned ID) const {
139    assert(ID < FilenamesByID.size() && "Invalid FilenameID");
140    return FilenamesByID[ID]->getKeyData();
141  }
142
143  void AddLineNote(unsigned FID, unsigned Offset,
144                   unsigned LineNo, int FilenameID);
145  void AddLineNote(unsigned FID, unsigned Offset,
146                   unsigned LineNo, int FilenameID,
147                   unsigned EntryExit, SrcMgr::CharacteristicKind FileKind);
148
149
150  /// FindNearestLineEntry - Find the line entry nearest to FID that is before
151  /// it.  If there is no line entry before Offset in FID, return null.
152  const LineEntry *FindNearestLineEntry(unsigned FID, unsigned Offset);
153};
154} // namespace clang
155
156unsigned LineTableInfo::getLineTableFilenameID(const char *Ptr, unsigned Len) {
157  // Look up the filename in the string table, returning the pre-existing value
158  // if it exists.
159  llvm::StringMapEntry<unsigned> &Entry =
160    FilenameIDs.GetOrCreateValue(Ptr, Ptr+Len, ~0U);
161  if (Entry.getValue() != ~0U)
162    return Entry.getValue();
163
164  // Otherwise, assign this the next available ID.
165  Entry.setValue(FilenamesByID.size());
166  FilenamesByID.push_back(&Entry);
167  return FilenamesByID.size()-1;
168}
169
170/// AddLineNote - Add a line note to the line table that indicates that there
171/// is a #line at the specified FID/Offset location which changes the presumed
172/// location to LineNo/FilenameID.
173void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
174                                unsigned LineNo, int FilenameID) {
175  std::vector<LineEntry> &Entries = LineEntries[FID];
176
177  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
178         "Adding line entries out of order!");
179
180  SrcMgr::CharacteristicKind Kind = SrcMgr::C_User;
181  unsigned IncludeOffset = 0;
182
183  if (!Entries.empty()) {
184    // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember
185    // that we are still in "foo.h".
186    if (FilenameID == -1)
187      FilenameID = Entries.back().FilenameID;
188
189    // If we are after a line marker that switched us to system header mode, or
190    // that set #include information, preserve it.
191    Kind = Entries.back().FileKind;
192    IncludeOffset = Entries.back().IncludeOffset;
193  }
194
195  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind,
196                                   IncludeOffset));
197}
198
199/// AddLineNote This is the same as the previous version of AddLineNote, but is
200/// used for GNU line markers.  If EntryExit is 0, then this doesn't change the
201/// presumed #include stack.  If it is 1, this is a file entry, if it is 2 then
202/// this is a file exit.  FileKind specifies whether this is a system header or
203/// extern C system header.
204void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
205                                unsigned LineNo, int FilenameID,
206                                unsigned EntryExit,
207                                SrcMgr::CharacteristicKind FileKind) {
208  assert(FilenameID != -1 && "Unspecified filename should use other accessor");
209
210  std::vector<LineEntry> &Entries = LineEntries[FID];
211
212  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
213         "Adding line entries out of order!");
214
215  unsigned IncludeOffset = 0;
216  if (EntryExit == 0) {  // No #include stack change.
217    IncludeOffset = Entries.empty() ? 0 : Entries.back().IncludeOffset;
218  } else if (EntryExit == 1) {
219    IncludeOffset = Offset-1;
220  } else if (EntryExit == 2) {
221    assert(!Entries.empty() && Entries.back().IncludeOffset &&
222       "PPDirectives should have caught case when popping empty include stack");
223
224    // Get the include loc of the last entries' include loc as our include loc.
225    IncludeOffset = 0;
226    if (const LineEntry *PrevEntry =
227          FindNearestLineEntry(FID, Entries.back().IncludeOffset))
228      IncludeOffset = PrevEntry->IncludeOffset;
229  }
230
231  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind,
232                                   IncludeOffset));
233}
234
235
236/// FindNearestLineEntry - Find the line entry nearest to FID that is before
237/// it.  If there is no line entry before Offset in FID, return null.
238const LineEntry *LineTableInfo::FindNearestLineEntry(unsigned FID,
239                                                     unsigned Offset) {
240  const std::vector<LineEntry> &Entries = LineEntries[FID];
241  assert(!Entries.empty() && "No #line entries for this FID after all!");
242
243  // It is very common for the query to be after the last #line, check this
244  // first.
245  if (Entries.back().FileOffset <= Offset)
246    return &Entries.back();
247
248  // Do a binary search to find the maximal element that is still before Offset.
249  std::vector<LineEntry>::const_iterator I =
250    std::upper_bound(Entries.begin(), Entries.end(), Offset);
251  if (I == Entries.begin()) return 0;
252  return &*--I;
253}
254
255
256/// getLineTableFilenameID - Return the uniqued ID for the specified filename.
257///
258unsigned SourceManager::getLineTableFilenameID(const char *Ptr, unsigned Len) {
259  if (LineTable == 0)
260    LineTable = new LineTableInfo();
261  return LineTable->getLineTableFilenameID(Ptr, Len);
262}
263
264
265/// AddLineNote - Add a line note to the line table for the FileID and offset
266/// specified by Loc.  If FilenameID is -1, it is considered to be
267/// unspecified.
268void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
269                                int FilenameID) {
270  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
271
272  const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
273
274  // Remember that this file has #line directives now if it doesn't already.
275  const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
276
277  if (LineTable == 0)
278    LineTable = new LineTableInfo();
279  LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID);
280}
281
282/// AddLineNote - Add a GNU line marker to the line table.
283void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
284                                int FilenameID, bool IsFileEntry,
285                                bool IsFileExit, bool IsSystemHeader,
286                                bool IsExternCHeader) {
287  // If there is no filename and no flags, this is treated just like a #line,
288  // which does not change the flags of the previous line marker.
289  if (FilenameID == -1) {
290    assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader &&
291           "Can't set flags without setting the filename!");
292    return AddLineNote(Loc, LineNo, FilenameID);
293  }
294
295  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
296  const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
297
298  // Remember that this file has #line directives now if it doesn't already.
299  const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
300
301  if (LineTable == 0)
302    LineTable = new LineTableInfo();
303
304  SrcMgr::CharacteristicKind FileKind;
305  if (IsExternCHeader)
306    FileKind = SrcMgr::C_ExternCSystem;
307  else if (IsSystemHeader)
308    FileKind = SrcMgr::C_System;
309  else
310    FileKind = SrcMgr::C_User;
311
312  unsigned EntryExit = 0;
313  if (IsFileEntry)
314    EntryExit = 1;
315  else if (IsFileExit)
316    EntryExit = 2;
317
318  LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID,
319                         EntryExit, FileKind);
320}
321
322
323//===----------------------------------------------------------------------===//
324// Private 'Create' methods.
325//===----------------------------------------------------------------------===//
326
327SourceManager::~SourceManager() {
328  delete LineTable;
329
330  // Delete FileEntry objects corresponding to content caches.  Since the actual
331  // content cache objects are bump pointer allocated, we just have to run the
332  // dtors, but we call the deallocate method for completeness.
333  for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) {
334    MemBufferInfos[i]->~ContentCache();
335    ContentCacheAlloc.Deallocate(MemBufferInfos[i]);
336  }
337  for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator
338       I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
339    I->second->~ContentCache();
340    ContentCacheAlloc.Deallocate(I->second);
341  }
342}
343
344void SourceManager::clearIDTables() {
345  MainFileID = FileID();
346  SLocEntryTable.clear();
347  LastLineNoFileIDQuery = FileID();
348  LastLineNoContentCache = 0;
349  LastFileIDLookup = FileID();
350
351  if (LineTable)
352    LineTable->clear();
353
354  // Use up FileID #0 as an invalid instantiation.
355  NextOffset = 0;
356  createInstantiationLoc(SourceLocation(),SourceLocation(),SourceLocation(), 1);
357}
358
359/// getOrCreateContentCache - Create or return a cached ContentCache for the
360/// specified file.
361const ContentCache *
362SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) {
363  assert(FileEnt && "Didn't specify a file entry to use?");
364
365  // Do we already have information about this file?
366  ContentCache *&Entry = FileInfos[FileEnt];
367  if (Entry) return Entry;
368
369  // Nope, create a new Cache entry.  Make sure it is at least 8-byte aligned
370  // so that FileInfo can use the low 3 bits of the pointer for its own
371  // nefarious purposes.
372  unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
373  EntryAlign = std::max(8U, EntryAlign);
374  Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
375  new (Entry) ContentCache(FileEnt);
376  return Entry;
377}
378
379
380/// createMemBufferContentCache - Create a new ContentCache for the specified
381///  memory buffer.  This does no caching.
382const ContentCache*
383SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
384  // Add a new ContentCache to the MemBufferInfos list and return it.  Make sure
385  // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of
386  // the pointer for its own nefarious purposes.
387  unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
388  EntryAlign = std::max(8U, EntryAlign);
389  ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
390  new (Entry) ContentCache();
391  MemBufferInfos.push_back(Entry);
392  Entry->setBuffer(Buffer);
393  return Entry;
394}
395
396//===----------------------------------------------------------------------===//
397// Methods to create new FileID's and instantiations.
398//===----------------------------------------------------------------------===//
399
400/// createFileID - Create a new fileID for the specified ContentCache and
401/// include position.  This works regardless of whether the ContentCache
402/// corresponds to a file or some other input source.
403FileID SourceManager::createFileID(const ContentCache *File,
404                                   SourceLocation IncludePos,
405                                   SrcMgr::CharacteristicKind FileCharacter) {
406  SLocEntryTable.push_back(SLocEntry::get(NextOffset,
407                                          FileInfo::get(IncludePos, File,
408                                                        FileCharacter)));
409  unsigned FileSize = File->getSize();
410  assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!");
411  NextOffset += FileSize+1;
412
413  // Set LastFileIDLookup to the newly created file.  The next getFileID call is
414  // almost guaranteed to be from that file.
415  return LastFileIDLookup = FileID::get(SLocEntryTable.size()-1);
416}
417
418/// createInstantiationLoc - Return a new SourceLocation that encodes the fact
419/// that a token from SpellingLoc should actually be referenced from
420/// InstantiationLoc.
421SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
422                                                     SourceLocation ILocStart,
423                                                     SourceLocation ILocEnd,
424                                                     unsigned TokLength) {
425  InstantiationInfo II = InstantiationInfo::get(ILocStart,ILocEnd, SpellingLoc);
426  SLocEntryTable.push_back(SLocEntry::get(NextOffset, II));
427  assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!");
428  NextOffset += TokLength+1;
429  return SourceLocation::getMacroLoc(NextOffset-(TokLength+1));
430}
431
432/// getBufferData - Return a pointer to the start and end of the source buffer
433/// data for the specified FileID.
434std::pair<const char*, const char*>
435SourceManager::getBufferData(FileID FID) const {
436  const llvm::MemoryBuffer *Buf = getBuffer(FID);
437  return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
438}
439
440
441//===----------------------------------------------------------------------===//
442// SourceLocation manipulation methods.
443//===----------------------------------------------------------------------===//
444
445/// getFileIDSlow - Return the FileID for a SourceLocation.  This is a very hot
446/// method that is used for all SourceManager queries that start with a
447/// SourceLocation object.  It is responsible for finding the entry in
448/// SLocEntryTable which contains the specified location.
449///
450FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
451  assert(SLocOffset && "Invalid FileID");
452
453  // After the first and second level caches, I see two common sorts of
454  // behavior: 1) a lot of searched FileID's are "near" the cached file location
455  // or are "near" the cached instantiation location.  2) others are just
456  // completely random and may be a very long way away.
457  //
458  // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
459  // then we fall back to a less cache efficient, but more scalable, binary
460  // search to find the location.
461
462  // See if this is near the file point - worst case we start scanning from the
463  // most newly created FileID.
464  std::vector<SrcMgr::SLocEntry>::const_iterator I;
465
466  if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) {
467    // Neither loc prunes our search.
468    I = SLocEntryTable.end();
469  } else {
470    // Perhaps it is near the file point.
471    I = SLocEntryTable.begin()+LastFileIDLookup.ID;
472  }
473
474  // Find the FileID that contains this.  "I" is an iterator that points to a
475  // FileID whose offset is known to be larger than SLocOffset.
476  unsigned NumProbes = 0;
477  while (1) {
478    --I;
479    if (I->getOffset() <= SLocOffset) {
480#if 0
481      printf("lin %d -> %d [%s] %d %d\n", SLocOffset,
482             I-SLocEntryTable.begin(),
483             I->isInstantiation() ? "inst" : "file",
484             LastFileIDLookup.ID,  int(SLocEntryTable.end()-I));
485#endif
486      FileID Res = FileID::get(I-SLocEntryTable.begin());
487
488      // If this isn't an instantiation, remember it.  We have good locality
489      // across FileID lookups.
490      if (!I->isInstantiation())
491        LastFileIDLookup = Res;
492      NumLinearScans += NumProbes+1;
493      return Res;
494    }
495    if (++NumProbes == 8)
496      break;
497  }
498
499  // Convert "I" back into an index.  We know that it is an entry whose index is
500  // larger than the offset we are looking for.
501  unsigned GreaterIndex = I-SLocEntryTable.begin();
502  // LessIndex - This is the lower bound of the range that we're searching.
503  // We know that the offset corresponding to the FileID is is less than
504  // SLocOffset.
505  unsigned LessIndex = 0;
506  NumProbes = 0;
507  while (1) {
508    unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
509    unsigned MidOffset = SLocEntryTable[MiddleIndex].getOffset();
510
511    ++NumProbes;
512
513    // If the offset of the midpoint is too large, chop the high side of the
514    // range to the midpoint.
515    if (MidOffset > SLocOffset) {
516      GreaterIndex = MiddleIndex;
517      continue;
518    }
519
520    // If the middle index contains the value, succeed and return.
521    if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) {
522#if 0
523      printf("bin %d -> %d [%s] %d %d\n", SLocOffset,
524             I-SLocEntryTable.begin(),
525             I->isInstantiation() ? "inst" : "file",
526             LastFileIDLookup.ID, int(SLocEntryTable.end()-I));
527#endif
528      FileID Res = FileID::get(MiddleIndex);
529
530      // If this isn't an instantiation, remember it.  We have good locality
531      // across FileID lookups.
532      if (!I->isInstantiation())
533        LastFileIDLookup = Res;
534      NumBinaryProbes += NumProbes;
535      return Res;
536    }
537
538    // Otherwise, move the low-side up to the middle index.
539    LessIndex = MiddleIndex;
540  }
541}
542
543SourceLocation SourceManager::
544getInstantiationLocSlowCase(SourceLocation Loc) const {
545  do {
546    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
547    Loc = getSLocEntry(LocInfo.first).getInstantiation()
548                   .getInstantiationLocStart();
549    Loc = Loc.getFileLocWithOffset(LocInfo.second);
550  } while (!Loc.isFileID());
551
552  return Loc;
553}
554
555SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const {
556  do {
557    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
558    Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
559    Loc = Loc.getFileLocWithOffset(LocInfo.second);
560  } while (!Loc.isFileID());
561  return Loc;
562}
563
564
565std::pair<FileID, unsigned>
566SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
567                                                     unsigned Offset) const {
568  // If this is an instantiation record, walk through all the instantiation
569  // points.
570  FileID FID;
571  SourceLocation Loc;
572  do {
573    Loc = E->getInstantiation().getInstantiationLocStart();
574
575    FID = getFileID(Loc);
576    E = &getSLocEntry(FID);
577    Offset += Loc.getOffset()-E->getOffset();
578  } while (!Loc.isFileID());
579
580  return std::make_pair(FID, Offset);
581}
582
583std::pair<FileID, unsigned>
584SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
585                                                unsigned Offset) const {
586  // If this is an instantiation record, walk through all the instantiation
587  // points.
588  FileID FID;
589  SourceLocation Loc;
590  do {
591    Loc = E->getInstantiation().getSpellingLoc();
592
593    FID = getFileID(Loc);
594    E = &getSLocEntry(FID);
595    Offset += Loc.getOffset()-E->getOffset();
596  } while (!Loc.isFileID());
597
598  return std::make_pair(FID, Offset);
599}
600
601/// getImmediateSpellingLoc - Given a SourceLocation object, return the
602/// spelling location referenced by the ID.  This is the first level down
603/// towards the place where the characters that make up the lexed token can be
604/// found.  This should not generally be used by clients.
605SourceLocation SourceManager::getImmediateSpellingLoc(SourceLocation Loc) const{
606  if (Loc.isFileID()) return Loc;
607  std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
608  Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
609  return Loc.getFileLocWithOffset(LocInfo.second);
610}
611
612
613/// getImmediateInstantiationRange - Loc is required to be an instantiation
614/// location.  Return the start/end of the instantiation information.
615std::pair<SourceLocation,SourceLocation>
616SourceManager::getImmediateInstantiationRange(SourceLocation Loc) const {
617  assert(Loc.isMacroID() && "Not an instantiation loc!");
618  const InstantiationInfo &II = getSLocEntry(getFileID(Loc)).getInstantiation();
619  return II.getInstantiationLocRange();
620}
621
622/// getInstantiationRange - Given a SourceLocation object, return the
623/// range of tokens covered by the instantiation in the ultimate file.
624std::pair<SourceLocation,SourceLocation>
625SourceManager::getInstantiationRange(SourceLocation Loc) const {
626  if (Loc.isFileID()) return std::make_pair(Loc, Loc);
627
628  std::pair<SourceLocation,SourceLocation> Res =
629    getImmediateInstantiationRange(Loc);
630
631  // Fully resolve the start and end locations to their ultimate instantiation
632  // points.
633  while (!Res.first.isFileID())
634    Res.first = getImmediateInstantiationRange(Res.first).first;
635  while (!Res.second.isFileID())
636    Res.second = getImmediateInstantiationRange(Res.second).second;
637  return Res;
638}
639
640
641
642//===----------------------------------------------------------------------===//
643// Queries about the code at a SourceLocation.
644//===----------------------------------------------------------------------===//
645
646/// getCharacterData - Return a pointer to the start of the specified location
647/// in the appropriate MemoryBuffer.
648const char *SourceManager::getCharacterData(SourceLocation SL) const {
649  // Note that this is a hot function in the getSpelling() path, which is
650  // heavily used by -E mode.
651  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
652
653  // Note that calling 'getBuffer()' may lazily page in a source file.
654  return getSLocEntry(LocInfo.first).getFile().getContentCache()
655              ->getBuffer()->getBufferStart() + LocInfo.second;
656}
657
658
659/// getColumnNumber - Return the column # for the specified file position.
660/// this is significantly cheaper to compute than the line number.
661unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
662  const char *Buf = getBuffer(FID)->getBufferStart();
663
664  unsigned LineStart = FilePos;
665  while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
666    --LineStart;
667  return FilePos-LineStart+1;
668}
669
670unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const {
671  if (Loc.isInvalid()) return 0;
672  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
673  return getColumnNumber(LocInfo.first, LocInfo.second);
674}
675
676unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const {
677  if (Loc.isInvalid()) return 0;
678  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
679  return getColumnNumber(LocInfo.first, LocInfo.second);
680}
681
682
683
684static void ComputeLineNumbers(ContentCache* FI,
685                               llvm::BumpPtrAllocator &Alloc) DISABLE_INLINE;
686static void ComputeLineNumbers(ContentCache* FI, llvm::BumpPtrAllocator &Alloc){
687  // Note that calling 'getBuffer()' may lazily page in the file.
688  const MemoryBuffer *Buffer = FI->getBuffer();
689
690  // Find the file offsets of all of the *physical* source lines.  This does
691  // not look at trigraphs, escaped newlines, or anything else tricky.
692  std::vector<unsigned> LineOffsets;
693
694  // Line #1 starts at char 0.
695  LineOffsets.push_back(0);
696
697  const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
698  const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
699  unsigned Offs = 0;
700  while (1) {
701    // Skip over the contents of the line.
702    // TODO: Vectorize this?  This is very performance sensitive for programs
703    // with lots of diagnostics and in -E mode.
704    const unsigned char *NextBuf = (const unsigned char *)Buf;
705    while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
706      ++NextBuf;
707    Offs += NextBuf-Buf;
708    Buf = NextBuf;
709
710    if (Buf[0] == '\n' || Buf[0] == '\r') {
711      // If this is \n\r or \r\n, skip both characters.
712      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
713        ++Offs, ++Buf;
714      ++Offs, ++Buf;
715      LineOffsets.push_back(Offs);
716    } else {
717      // Otherwise, this is a null.  If end of file, exit.
718      if (Buf == End) break;
719      // Otherwise, skip the null.
720      ++Offs, ++Buf;
721    }
722  }
723
724  // Copy the offsets into the FileInfo structure.
725  FI->NumLines = LineOffsets.size();
726  FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size());
727  std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
728}
729
730/// getLineNumber - Given a SourceLocation, return the spelling line number
731/// for the position indicated.  This requires building and caching a table of
732/// line offsets for the MemoryBuffer, so this is not cheap: use only when
733/// about to emit a diagnostic.
734unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
735  ContentCache *Content;
736  if (LastLineNoFileIDQuery == FID)
737    Content = LastLineNoContentCache;
738  else
739    Content = const_cast<ContentCache*>(getSLocEntry(FID)
740                                        .getFile().getContentCache());
741
742  // If this is the first use of line information for this buffer, compute the
743  /// SourceLineCache for it on demand.
744  if (Content->SourceLineCache == 0)
745    ComputeLineNumbers(Content, ContentCacheAlloc);
746
747  // Okay, we know we have a line number table.  Do a binary search to find the
748  // line number that this character position lands on.
749  unsigned *SourceLineCache = Content->SourceLineCache;
750  unsigned *SourceLineCacheStart = SourceLineCache;
751  unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
752
753  unsigned QueriedFilePos = FilePos+1;
754
755  // If the previous query was to the same file, we know both the file pos from
756  // that query and the line number returned.  This allows us to narrow the
757  // search space from the entire file to something near the match.
758  if (LastLineNoFileIDQuery == FID) {
759    if (QueriedFilePos >= LastLineNoFilePos) {
760      SourceLineCache = SourceLineCache+LastLineNoResult-1;
761
762      // The query is likely to be nearby the previous one.  Here we check to
763      // see if it is within 5, 10 or 20 lines.  It can be far away in cases
764      // where big comment blocks and vertical whitespace eat up lines but
765      // contribute no tokens.
766      if (SourceLineCache+5 < SourceLineCacheEnd) {
767        if (SourceLineCache[5] > QueriedFilePos)
768          SourceLineCacheEnd = SourceLineCache+5;
769        else if (SourceLineCache+10 < SourceLineCacheEnd) {
770          if (SourceLineCache[10] > QueriedFilePos)
771            SourceLineCacheEnd = SourceLineCache+10;
772          else if (SourceLineCache+20 < SourceLineCacheEnd) {
773            if (SourceLineCache[20] > QueriedFilePos)
774              SourceLineCacheEnd = SourceLineCache+20;
775          }
776        }
777      }
778    } else {
779      SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
780    }
781  }
782
783  // If the spread is large, do a "radix" test as our initial guess, based on
784  // the assumption that lines average to approximately the same length.
785  // NOTE: This is currently disabled, as it does not appear to be profitable in
786  // initial measurements.
787  if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
788    unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
789
790    // Take a stab at guessing where it is.
791    unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
792
793    // Check for -10 and +10 lines.
794    unsigned LowerBound = std::max(int(ApproxPos-10), 0);
795    unsigned UpperBound = std::min(ApproxPos+10, FileLen);
796
797    // If the computed lower bound is less than the query location, move it in.
798    if (SourceLineCache < SourceLineCacheStart+LowerBound &&
799        SourceLineCacheStart[LowerBound] < QueriedFilePos)
800      SourceLineCache = SourceLineCacheStart+LowerBound;
801
802    // If the computed upper bound is greater than the query location, move it.
803    if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
804        SourceLineCacheStart[UpperBound] >= QueriedFilePos)
805      SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
806  }
807
808  unsigned *Pos
809    = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
810  unsigned LineNo = Pos-SourceLineCacheStart;
811
812  LastLineNoFileIDQuery = FID;
813  LastLineNoContentCache = Content;
814  LastLineNoFilePos = QueriedFilePos;
815  LastLineNoResult = LineNo;
816  return LineNo;
817}
818
819unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const {
820  if (Loc.isInvalid()) return 0;
821  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
822  return getLineNumber(LocInfo.first, LocInfo.second);
823}
824unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const {
825  if (Loc.isInvalid()) return 0;
826  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
827  return getLineNumber(LocInfo.first, LocInfo.second);
828}
829
830/// getFileCharacteristic - return the file characteristic of the specified
831/// source location, indicating whether this is a normal file, a system
832/// header, or an "implicit extern C" system header.
833///
834/// This state can be modified with flags on GNU linemarker directives like:
835///   # 4 "foo.h" 3
836/// which changes all source locations in the current file after that to be
837/// considered to be from a system header.
838SrcMgr::CharacteristicKind
839SourceManager::getFileCharacteristic(SourceLocation Loc) const {
840  assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!");
841  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
842  const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
843
844  // If there are no #line directives in this file, just return the whole-file
845  // state.
846  if (!FI.hasLineDirectives())
847    return FI.getFileCharacteristic();
848
849  assert(LineTable && "Can't have linetable entries without a LineTable!");
850  // See if there is a #line directive before the location.
851  const LineEntry *Entry =
852    LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second);
853
854  // If this is before the first line marker, use the file characteristic.
855  if (!Entry)
856    return FI.getFileCharacteristic();
857
858  return Entry->FileKind;
859}
860
861/// Return the filename or buffer identifier of the buffer the location is in.
862/// Note that this name does not respect #line directives.  Use getPresumedLoc
863/// for normal clients.
864const char *SourceManager::getBufferName(SourceLocation Loc) const {
865  if (Loc.isInvalid()) return "<invalid loc>";
866
867  return getBuffer(getFileID(Loc))->getBufferIdentifier();
868}
869
870
871/// getPresumedLoc - This method returns the "presumed" location of a
872/// SourceLocation specifies.  A "presumed location" can be modified by #line
873/// or GNU line marker directives.  This provides a view on the data that a
874/// user should see in diagnostics, for example.
875///
876/// Note that a presumed location is always given as the instantiation point
877/// of an instantiation location, not at the spelling location.
878PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const {
879  if (Loc.isInvalid()) return PresumedLoc();
880
881  // Presumed locations are always for instantiation points.
882  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
883
884  const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
885  const SrcMgr::ContentCache *C = FI.getContentCache();
886
887  // To get the source name, first consult the FileEntry (if one exists)
888  // before the MemBuffer as this will avoid unnecessarily paging in the
889  // MemBuffer.
890  const char *Filename =
891    C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
892  unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second);
893  unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second);
894  SourceLocation IncludeLoc = FI.getIncludeLoc();
895
896  // If we have #line directives in this file, update and overwrite the physical
897  // location info if appropriate.
898  if (FI.hasLineDirectives()) {
899    assert(LineTable && "Can't have linetable entries without a LineTable!");
900    // See if there is a #line directive before this.  If so, get it.
901    if (const LineEntry *Entry =
902          LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) {
903      // If the LineEntry indicates a filename, use it.
904      if (Entry->FilenameID != -1)
905        Filename = LineTable->getFilename(Entry->FilenameID);
906
907      // Use the line number specified by the LineEntry.  This line number may
908      // be multiple lines down from the line entry.  Add the difference in
909      // physical line numbers from the query point and the line marker to the
910      // total.
911      unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset);
912      LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1);
913
914      // Note that column numbers are not molested by line markers.
915
916      // Handle virtual #include manipulation.
917      if (Entry->IncludeOffset) {
918        IncludeLoc = getLocForStartOfFile(LocInfo.first);
919        IncludeLoc = IncludeLoc.getFileLocWithOffset(Entry->IncludeOffset);
920      }
921    }
922  }
923
924  return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc);
925}
926
927//===----------------------------------------------------------------------===//
928// Other miscellaneous methods.
929//===----------------------------------------------------------------------===//
930
931
932/// PrintStats - Print statistics to stderr.
933///
934void SourceManager::PrintStats() const {
935  llvm::cerr << "\n*** Source Manager Stats:\n";
936  llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
937             << " mem buffers mapped.\n";
938  llvm::cerr << SLocEntryTable.size() << " SLocEntry's allocated, "
939             << NextOffset << "B of Sloc address space used.\n";
940
941  unsigned NumLineNumsComputed = 0;
942  unsigned NumFileBytesMapped = 0;
943  for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
944    NumLineNumsComputed += I->second->SourceLineCache != 0;
945    NumFileBytesMapped  += I->second->getSizeBytesMapped();
946  }
947
948  llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
949             << NumLineNumsComputed << " files with line #'s computed.\n";
950  llvm::cerr << "FileID scans: " << NumLinearScans << " linear, "
951             << NumBinaryProbes << " binary.\n";
952}
953
954//===----------------------------------------------------------------------===//
955// Serialization.
956//===----------------------------------------------------------------------===//
957
958void ContentCache::Emit(llvm::Serializer& S) const {
959  S.FlushRecord();
960  S.EmitPtr(this);
961
962  if (Entry) {
963    llvm::sys::Path Fname(Buffer->getBufferIdentifier());
964
965    if (Fname.isAbsolute())
966      S.EmitCStr(Fname.c_str());
967    else {
968      // Create an absolute path.
969      // FIXME: This will potentially contain ".." and "." in the path.
970      llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
971      path.appendComponent(Fname.c_str());
972      S.EmitCStr(path.c_str());
973    }
974  }
975  else {
976    const char* p = Buffer->getBufferStart();
977    const char* e = Buffer->getBufferEnd();
978
979    S.EmitInt(e-p);
980
981    for ( ; p != e; ++p)
982      S.EmitInt(*p);
983  }
984
985  S.FlushRecord();
986}
987
988void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
989                                       SourceManager& SMgr,
990                                       FileManager* FMgr,
991                                       std::vector<char>& Buf) {
992  if (FMgr) {
993    llvm::SerializedPtrID PtrID = D.ReadPtrID();
994    D.ReadCStr(Buf,false);
995
996    // Create/fetch the FileEntry.
997    const char* start = &Buf[0];
998    const FileEntry* E = FMgr->getFile(start,start+Buf.size());
999
1000    // FIXME: Ideally we want a lazy materialization of the ContentCache
1001    //  anyway, because we don't want to read in source files unless this
1002    //  is absolutely needed.
1003    if (!E)
1004      D.RegisterPtr(PtrID,NULL);
1005    else
1006      // Get the ContextCache object and register it with the deserializer.
1007      D.RegisterPtr(PtrID, SMgr.getOrCreateContentCache(E));
1008    return;
1009  }
1010
1011  // Register the ContextCache object with the deserializer.
1012  /* FIXME:
1013  ContentCache *Entry
1014  SMgr.MemBufferInfos.push_back(ContentCache());
1015   = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
1016  D.RegisterPtr(&Entry);
1017
1018  // Create the buffer.
1019  unsigned Size = D.ReadInt();
1020  Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
1021
1022  // Read the contents of the buffer.
1023  char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
1024  for (unsigned i = 0; i < Size ; ++i)
1025    p[i] = D.ReadInt();
1026   */
1027}
1028
1029void SourceManager::Emit(llvm::Serializer& S) const {
1030  S.EnterBlock();
1031  S.EmitPtr(this);
1032  S.EmitInt(MainFileID.getOpaqueValue());
1033
1034  // Emit: FileInfos.  Just emit the file name.
1035  S.EnterBlock();
1036
1037  // FIXME: Emit FileInfos.
1038  //std::for_each(FileInfos.begin(), FileInfos.end(),
1039  //              S.MakeEmitter<ContentCache>());
1040
1041  S.ExitBlock();
1042
1043  // Emit: MemBufferInfos
1044  S.EnterBlock();
1045
1046  /* FIXME: EMIT.
1047  std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
1048                S.MakeEmitter<ContentCache>());
1049   */
1050
1051  S.ExitBlock();
1052
1053  // FIXME: Emit SLocEntryTable.
1054
1055  S.ExitBlock();
1056}
1057
1058SourceManager*
1059SourceManager::CreateAndRegister(llvm::Deserializer &D, FileManager &FMgr) {
1060  SourceManager *M = new SourceManager();
1061  D.RegisterPtr(M);
1062
1063  // Read: the FileID of the main source file of the translation unit.
1064  M->MainFileID = FileID::get(D.ReadInt());
1065
1066  std::vector<char> Buf;
1067
1068  /*{ // FIXME Read: FileInfos.
1069    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
1070    while (!D.FinishedBlock(BLoc))
1071    ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
1072  }*/
1073
1074  /*{ // FIXME Read: MemBufferInfos.
1075    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
1076    while (!D.FinishedBlock(BLoc))
1077    ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
1078    }*/
1079
1080  // FIXME: Read SLocEntryTable.
1081
1082  return M;
1083}
1084