SourceManager.cpp revision 137b6a6149c53dbbcb8fba98e524d9ad0f3c8736
1//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/MemoryBuffer.h"
18#include "llvm/System/Path.h"
19#include "llvm/Bitcode/Serialize.h"
20#include "llvm/Bitcode/Deserialize.h"
21#include "llvm/Support/Streams.h"
22#include <algorithm>
23using namespace clang;
24using namespace SrcMgr;
25using llvm::MemoryBuffer;
26
27//===----------------------------------------------------------------------===//
28// SourceManager Helper Classes
29//===----------------------------------------------------------------------===//
30
31ContentCache::~ContentCache() {
32  delete Buffer;
33}
34
35/// getSizeBytesMapped - Returns the number of bytes actually mapped for
36///  this ContentCache.  This can be 0 if the MemBuffer was not actually
37///  instantiated.
38unsigned ContentCache::getSizeBytesMapped() const {
39  return Buffer ? Buffer->getBufferSize() : 0;
40}
41
42/// getSize - Returns the size of the content encapsulated by this ContentCache.
43///  This can be the size of the source file or the size of an arbitrary
44///  scratch buffer.  If the ContentCache encapsulates a source file, that
45///  file is not lazily brought in from disk to satisfy this query.
46unsigned ContentCache::getSize() const {
47  return Entry ? Entry->getSize() : Buffer->getBufferSize();
48}
49
50const llvm::MemoryBuffer *ContentCache::getBuffer() const {
51  // Lazily create the Buffer for ContentCaches that wrap files.
52  if (!Buffer && Entry) {
53    // FIXME: Should we support a way to not have to do this check over
54    //   and over if we cannot open the file?
55    Buffer = MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize());
56  }
57  return Buffer;
58}
59
60//===----------------------------------------------------------------------===//
61// Line Table Implementation
62//===----------------------------------------------------------------------===//
63
64namespace clang {
65struct LineEntry {
66  /// FileOffset - The offset in this file that the line entry occurs at.
67  unsigned FileOffset;
68
69  /// LineNo - The presumed line number of this line entry: #line 4.
70  unsigned LineNo;
71
72  /// FilenameID - The ID of the filename identified by this line entry:
73  /// #line 4 "foo.c".  This is -1 if not specified.
74  int FilenameID;
75
76  /// Flags - Set the 0 if no flags, 1 if a system header,
77  SrcMgr::CharacteristicKind FileKind;
78
79  /// IncludeOffset - This is the offset of the virtual include stack location,
80  /// which is manipulated by GNU linemarker directives.  If this is 0 then
81  /// there is no virtual #includer.
82  unsigned IncludeOffset;
83
84  static LineEntry get(unsigned Offs, unsigned Line, int Filename,
85                       SrcMgr::CharacteristicKind FileKind,
86                       unsigned IncludeOffset) {
87    LineEntry E;
88    E.FileOffset = Offs;
89    E.LineNo = Line;
90    E.FilenameID = Filename;
91    E.FileKind = FileKind;
92    E.IncludeOffset = IncludeOffset;
93    return E;
94  }
95};
96
97inline bool operator<(const LineEntry &E, unsigned Offset) {
98  return E.FileOffset < Offset;
99}
100
101inline bool operator<(unsigned Offset, const LineEntry &E) {
102  return Offset < E.FileOffset;
103}
104
105/// LineTableInfo - This class is used to hold and unique data used to
106/// represent #line information.
107class LineTableInfo {
108  /// FilenameIDs - This map is used to assign unique IDs to filenames in
109  /// #line directives.  This allows us to unique the filenames that
110  /// frequently reoccur and reference them with indices.  FilenameIDs holds
111  /// the mapping from string -> ID, and FilenamesByID holds the mapping of ID
112  /// to string.
113  llvm::StringMap<unsigned, llvm::BumpPtrAllocator> FilenameIDs;
114  std::vector<llvm::StringMapEntry<unsigned>*> FilenamesByID;
115
116  /// LineEntries - This is a map from FileIDs to a list of line entries (sorted
117  /// by the offset they occur in the file.
118  std::map<unsigned, std::vector<LineEntry> > LineEntries;
119public:
120  LineTableInfo() {
121  }
122
123  void clear() {
124    FilenameIDs.clear();
125    FilenamesByID.clear();
126  }
127
128  ~LineTableInfo() {}
129
130  unsigned getLineTableFilenameID(const char *Ptr, unsigned Len);
131  const char *getFilename(unsigned ID) const {
132    assert(ID < FilenamesByID.size() && "Invalid FilenameID");
133    return FilenamesByID[ID]->getKeyData();
134  }
135
136  void AddLineNote(unsigned FID, unsigned Offset,
137                   unsigned LineNo, int FilenameID);
138  void AddLineNote(unsigned FID, unsigned Offset,
139                   unsigned LineNo, int FilenameID,
140                   unsigned EntryExit, SrcMgr::CharacteristicKind FileKind);
141
142
143  /// FindNearestLineEntry - Find the line entry nearest to FID that is before
144  /// it.  If there is no line entry before Offset in FID, return null.
145  const LineEntry *FindNearestLineEntry(unsigned FID, unsigned Offset);
146};
147} // namespace clang
148
149unsigned LineTableInfo::getLineTableFilenameID(const char *Ptr, unsigned Len) {
150  // Look up the filename in the string table, returning the pre-existing value
151  // if it exists.
152  llvm::StringMapEntry<unsigned> &Entry =
153    FilenameIDs.GetOrCreateValue(Ptr, Ptr+Len, ~0U);
154  if (Entry.getValue() != ~0U)
155    return Entry.getValue();
156
157  // Otherwise, assign this the next available ID.
158  Entry.setValue(FilenamesByID.size());
159  FilenamesByID.push_back(&Entry);
160  return FilenamesByID.size()-1;
161}
162
163/// AddLineNote - Add a line note to the line table that indicates that there
164/// is a #line at the specified FID/Offset location which changes the presumed
165/// location to LineNo/FilenameID.
166void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
167                                unsigned LineNo, int FilenameID) {
168  std::vector<LineEntry> &Entries = LineEntries[FID];
169
170  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
171         "Adding line entries out of order!");
172
173  SrcMgr::CharacteristicKind Kind = SrcMgr::C_User;
174  unsigned IncludeOffset = 0;
175
176  if (!Entries.empty()) {
177    // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember
178    // that we are still in "foo.h".
179    if (FilenameID == -1)
180      FilenameID = Entries.back().FilenameID;
181
182    // If we are after a line marker that switched us to system header mode, or
183    // that set #include information, preserve it.
184    Kind = Entries.back().FileKind;
185    IncludeOffset = Entries.back().IncludeOffset;
186  }
187
188  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind,
189                                   IncludeOffset));
190}
191
192/// AddLineNote This is the same as the previous version of AddLineNote, but is
193/// used for GNU line markers.  If EntryExit is 0, then this doesn't change the
194/// presumed #include stack.  If it is 1, this is a file entry, if it is 2 then
195/// this is a file exit.  FileKind specifies whether this is a system header or
196/// extern C system header.
197void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
198                                unsigned LineNo, int FilenameID,
199                                unsigned EntryExit,
200                                SrcMgr::CharacteristicKind FileKind) {
201  assert(FilenameID != -1 && "Unspecified filename should use other accessor");
202
203  std::vector<LineEntry> &Entries = LineEntries[FID];
204
205  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
206         "Adding line entries out of order!");
207
208  unsigned IncludeOffset = 0;
209  if (EntryExit == 0) {  // No #include stack change.
210    IncludeOffset = Entries.empty() ? 0 : Entries.back().IncludeOffset;
211  } else if (EntryExit == 1) {
212    IncludeOffset = Offset-1;
213  } else if (EntryExit == 2) {
214    assert(!Entries.empty() && Entries.back().IncludeOffset &&
215       "PPDirectives should have caught case when popping empty include stack");
216
217    // Get the include loc of the last entries' include loc as our include loc.
218    IncludeOffset = 0;
219    if (const LineEntry *PrevEntry =
220          FindNearestLineEntry(FID, Entries.back().IncludeOffset))
221      IncludeOffset = PrevEntry->IncludeOffset;
222  }
223
224  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind,
225                                   IncludeOffset));
226}
227
228
229/// FindNearestLineEntry - Find the line entry nearest to FID that is before
230/// it.  If there is no line entry before Offset in FID, return null.
231const LineEntry *LineTableInfo::FindNearestLineEntry(unsigned FID,
232                                                     unsigned Offset) {
233  const std::vector<LineEntry> &Entries = LineEntries[FID];
234  assert(!Entries.empty() && "No #line entries for this FID after all!");
235
236  // It is very common for the query to be after the last #line, check this
237  // first.
238  if (Entries.back().FileOffset <= Offset)
239    return &Entries.back();
240
241  // Do a binary search to find the maximal element that is still before Offset.
242  std::vector<LineEntry>::const_iterator I =
243    std::upper_bound(Entries.begin(), Entries.end(), Offset);
244  if (I == Entries.begin()) return 0;
245  return &*--I;
246}
247
248
249/// getLineTableFilenameID - Return the uniqued ID for the specified filename.
250///
251unsigned SourceManager::getLineTableFilenameID(const char *Ptr, unsigned Len) {
252  if (LineTable == 0)
253    LineTable = new LineTableInfo();
254  return LineTable->getLineTableFilenameID(Ptr, Len);
255}
256
257
258/// AddLineNote - Add a line note to the line table for the FileID and offset
259/// specified by Loc.  If FilenameID is -1, it is considered to be
260/// unspecified.
261void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
262                                int FilenameID) {
263  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
264
265  const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
266
267  // Remember that this file has #line directives now if it doesn't already.
268  const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
269
270  if (LineTable == 0)
271    LineTable = new LineTableInfo();
272  LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID);
273}
274
275/// AddLineNote - Add a GNU line marker to the line table.
276void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
277                                int FilenameID, bool IsFileEntry,
278                                bool IsFileExit, bool IsSystemHeader,
279                                bool IsExternCHeader) {
280  // If there is no filename and no flags, this is treated just like a #line,
281  // which does not change the flags of the previous line marker.
282  if (FilenameID == -1) {
283    assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader &&
284           "Can't set flags without setting the filename!");
285    return AddLineNote(Loc, LineNo, FilenameID);
286  }
287
288  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
289  const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
290
291  // Remember that this file has #line directives now if it doesn't already.
292  const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
293
294  if (LineTable == 0)
295    LineTable = new LineTableInfo();
296
297  SrcMgr::CharacteristicKind FileKind;
298  if (IsExternCHeader)
299    FileKind = SrcMgr::C_ExternCSystem;
300  else if (IsSystemHeader)
301    FileKind = SrcMgr::C_System;
302  else
303    FileKind = SrcMgr::C_User;
304
305  unsigned EntryExit = 0;
306  if (IsFileEntry)
307    EntryExit = 1;
308  else if (IsFileExit)
309    EntryExit = 2;
310
311  LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID,
312                         EntryExit, FileKind);
313}
314
315
316//===----------------------------------------------------------------------===//
317// Private 'Create' methods.
318//===----------------------------------------------------------------------===//
319
320SourceManager::~SourceManager() {
321  delete LineTable;
322
323  // Delete FileEntry objects corresponding to content caches.  Since the actual
324  // content cache objects are bump pointer allocated, we just have to run the
325  // dtors, but we call the deallocate method for completeness.
326  for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) {
327    MemBufferInfos[i]->~ContentCache();
328    ContentCacheAlloc.Deallocate(MemBufferInfos[i]);
329  }
330  for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator
331       I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
332    I->second->~ContentCache();
333    ContentCacheAlloc.Deallocate(I->second);
334  }
335}
336
337void SourceManager::clearIDTables() {
338  MainFileID = FileID();
339  SLocEntryTable.clear();
340  LastLineNoFileIDQuery = FileID();
341  LastLineNoContentCache = 0;
342  LastFileIDLookup = FileID();
343
344  if (LineTable)
345    LineTable->clear();
346
347  // Use up FileID #0 as an invalid instantiation.
348  NextOffset = 0;
349  createInstantiationLoc(SourceLocation(), SourceLocation(), 1);
350}
351
352/// getOrCreateContentCache - Create or return a cached ContentCache for the
353/// specified file.
354const ContentCache *
355SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) {
356  assert(FileEnt && "Didn't specify a file entry to use?");
357
358  // Do we already have information about this file?
359  ContentCache *&Entry = FileInfos[FileEnt];
360  if (Entry) return Entry;
361
362  // Nope, create a new Cache entry.  Make sure it is at least 8-byte aligned
363  // so that FileInfo can use the low 3 bits of the pointer for its own
364  // nefarious purposes.
365  unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
366  EntryAlign = std::max(8U, EntryAlign);
367  Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
368  new (Entry) ContentCache(FileEnt);
369  return Entry;
370}
371
372
373/// createMemBufferContentCache - Create a new ContentCache for the specified
374///  memory buffer.  This does no caching.
375const ContentCache*
376SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
377  // Add a new ContentCache to the MemBufferInfos list and return it.  Make sure
378  // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of
379  // the pointer for its own nefarious purposes.
380  unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
381  EntryAlign = std::max(8U, EntryAlign);
382  ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
383  new (Entry) ContentCache();
384  MemBufferInfos.push_back(Entry);
385  Entry->setBuffer(Buffer);
386  return Entry;
387}
388
389//===----------------------------------------------------------------------===//
390// Methods to create new FileID's and instantiations.
391//===----------------------------------------------------------------------===//
392
393/// createFileID - Create a new fileID for the specified ContentCache and
394/// include position.  This works regardless of whether the ContentCache
395/// corresponds to a file or some other input source.
396FileID SourceManager::createFileID(const ContentCache *File,
397                                   SourceLocation IncludePos,
398                                   SrcMgr::CharacteristicKind FileCharacter) {
399  SLocEntryTable.push_back(SLocEntry::get(NextOffset,
400                                          FileInfo::get(IncludePos, File,
401                                                        FileCharacter)));
402  unsigned FileSize = File->getSize();
403  assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!");
404  NextOffset += FileSize+1;
405
406  // Set LastFileIDLookup to the newly created file.  The next getFileID call is
407  // almost guaranteed to be from that file.
408  return LastFileIDLookup = FileID::get(SLocEntryTable.size()-1);
409}
410
411/// createInstantiationLoc - Return a new SourceLocation that encodes the fact
412/// that a token from SpellingLoc should actually be referenced from
413/// InstantiationLoc.
414SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
415                                                     SourceLocation InstantLoc,
416                                                     unsigned TokLength) {
417  SLocEntryTable.push_back(SLocEntry::get(NextOffset,
418                                          InstantiationInfo::get(InstantLoc,
419                                                                 SpellingLoc)));
420  assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!");
421  NextOffset += TokLength+1;
422  return SourceLocation::getMacroLoc(NextOffset-(TokLength+1));
423}
424
425/// getBufferData - Return a pointer to the start and end of the source buffer
426/// data for the specified FileID.
427std::pair<const char*, const char*>
428SourceManager::getBufferData(FileID FID) const {
429  const llvm::MemoryBuffer *Buf = getBuffer(FID);
430  return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
431}
432
433
434//===----------------------------------------------------------------------===//
435// SourceLocation manipulation methods.
436//===----------------------------------------------------------------------===//
437
438/// getFileIDSlow - Return the FileID for a SourceLocation.  This is a very hot
439/// method that is used for all SourceManager queries that start with a
440/// SourceLocation object.  It is responsible for finding the entry in
441/// SLocEntryTable which contains the specified location.
442///
443FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
444  assert(SLocOffset && "Invalid FileID");
445
446  // After the first and second level caches, I see two common sorts of
447  // behavior: 1) a lot of searched FileID's are "near" the cached file location
448  // or are "near" the cached instantiation location.  2) others are just
449  // completely random and may be a very long way away.
450  //
451  // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
452  // then we fall back to a less cache efficient, but more scalable, binary
453  // search to find the location.
454
455  // See if this is near the file point - worst case we start scanning from the
456  // most newly created FileID.
457  std::vector<SrcMgr::SLocEntry>::const_iterator I;
458
459  if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) {
460    // Neither loc prunes our search.
461    I = SLocEntryTable.end();
462  } else {
463    // Perhaps it is near the file point.
464    I = SLocEntryTable.begin()+LastFileIDLookup.ID;
465  }
466
467  // Find the FileID that contains this.  "I" is an iterator that points to a
468  // FileID whose offset is known to be larger than SLocOffset.
469  unsigned NumProbes = 0;
470  while (1) {
471    --I;
472    if (I->getOffset() <= SLocOffset) {
473#if 0
474      printf("lin %d -> %d [%s] %d %d\n", SLocOffset,
475             I-SLocEntryTable.begin(),
476             I->isInstantiation() ? "inst" : "file",
477             LastFileIDLookup.ID,  int(SLocEntryTable.end()-I));
478#endif
479      FileID Res = FileID::get(I-SLocEntryTable.begin());
480
481      // If this isn't an instantiation, remember it.  We have good locality
482      // across FileID lookups.
483      if (!I->isInstantiation())
484        LastFileIDLookup = Res;
485      NumLinearScans += NumProbes+1;
486      return Res;
487    }
488    if (++NumProbes == 8)
489      break;
490  }
491
492  // Convert "I" back into an index.  We know that it is an entry whose index is
493  // larger than the offset we are looking for.
494  unsigned GreaterIndex = I-SLocEntryTable.begin();
495  // LessIndex - This is the lower bound of the range that we're searching.
496  // We know that the offset corresponding to the FileID is is less than
497  // SLocOffset.
498  unsigned LessIndex = 0;
499  NumProbes = 0;
500  while (1) {
501    unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
502    unsigned MidOffset = SLocEntryTable[MiddleIndex].getOffset();
503
504    ++NumProbes;
505
506    // If the offset of the midpoint is too large, chop the high side of the
507    // range to the midpoint.
508    if (MidOffset > SLocOffset) {
509      GreaterIndex = MiddleIndex;
510      continue;
511    }
512
513    // If the middle index contains the value, succeed and return.
514    if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) {
515#if 0
516      printf("bin %d -> %d [%s] %d %d\n", SLocOffset,
517             I-SLocEntryTable.begin(),
518             I->isInstantiation() ? "inst" : "file",
519             LastFileIDLookup.ID, int(SLocEntryTable.end()-I));
520#endif
521      FileID Res = FileID::get(MiddleIndex);
522
523      // If this isn't an instantiation, remember it.  We have good locality
524      // across FileID lookups.
525      if (!I->isInstantiation())
526        LastFileIDLookup = Res;
527      NumBinaryProbes += NumProbes;
528      return Res;
529    }
530
531    // Otherwise, move the low-side up to the middle index.
532    LessIndex = MiddleIndex;
533  }
534}
535
536SourceLocation SourceManager::
537getInstantiationLocSlowCase(SourceLocation Loc) const {
538  do {
539    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
540    Loc =getSLocEntry(LocInfo.first).getInstantiation().getInstantiationLoc();
541    Loc = Loc.getFileLocWithOffset(LocInfo.second);
542  } while (!Loc.isFileID());
543
544  return Loc;
545}
546
547SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const {
548  do {
549    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
550    Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
551    Loc = Loc.getFileLocWithOffset(LocInfo.second);
552  } while (!Loc.isFileID());
553  return Loc;
554}
555
556
557std::pair<FileID, unsigned>
558SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
559                                                     unsigned Offset) const {
560  // If this is an instantiation record, walk through all the instantiation
561  // points.
562  FileID FID;
563  SourceLocation Loc;
564  do {
565    Loc = E->getInstantiation().getInstantiationLoc();
566
567    FID = getFileID(Loc);
568    E = &getSLocEntry(FID);
569    Offset += Loc.getOffset()-E->getOffset();
570  } while (!Loc.isFileID());
571
572  return std::make_pair(FID, Offset);
573}
574
575std::pair<FileID, unsigned>
576SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
577                                                unsigned Offset) const {
578  // If this is an instantiation record, walk through all the instantiation
579  // points.
580  FileID FID;
581  SourceLocation Loc;
582  do {
583    Loc = E->getInstantiation().getSpellingLoc();
584
585    FID = getFileID(Loc);
586    E = &getSLocEntry(FID);
587    Offset += Loc.getOffset()-E->getOffset();
588  } while (!Loc.isFileID());
589
590  return std::make_pair(FID, Offset);
591}
592
593
594//===----------------------------------------------------------------------===//
595// Queries about the code at a SourceLocation.
596//===----------------------------------------------------------------------===//
597
598/// getCharacterData - Return a pointer to the start of the specified location
599/// in the appropriate MemoryBuffer.
600const char *SourceManager::getCharacterData(SourceLocation SL) const {
601  // Note that this is a hot function in the getSpelling() path, which is
602  // heavily used by -E mode.
603  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
604
605  // Note that calling 'getBuffer()' may lazily page in a source file.
606  return getSLocEntry(LocInfo.first).getFile().getContentCache()
607              ->getBuffer()->getBufferStart() + LocInfo.second;
608}
609
610
611/// getColumnNumber - Return the column # for the specified file position.
612/// this is significantly cheaper to compute than the line number.
613unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
614  const char *Buf = getBuffer(FID)->getBufferStart();
615
616  unsigned LineStart = FilePos;
617  while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
618    --LineStart;
619  return FilePos-LineStart+1;
620}
621
622unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const {
623  if (Loc.isInvalid()) return 0;
624  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
625  return getColumnNumber(LocInfo.first, LocInfo.second);
626}
627
628unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const {
629  if (Loc.isInvalid()) return 0;
630  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
631  return getColumnNumber(LocInfo.first, LocInfo.second);
632}
633
634
635
636static void ComputeLineNumbers(ContentCache* FI,
637                               llvm::BumpPtrAllocator &Alloc) DISABLE_INLINE;
638static void ComputeLineNumbers(ContentCache* FI, llvm::BumpPtrAllocator &Alloc){
639  // Note that calling 'getBuffer()' may lazily page in the file.
640  const MemoryBuffer *Buffer = FI->getBuffer();
641
642  // Find the file offsets of all of the *physical* source lines.  This does
643  // not look at trigraphs, escaped newlines, or anything else tricky.
644  std::vector<unsigned> LineOffsets;
645
646  // Line #1 starts at char 0.
647  LineOffsets.push_back(0);
648
649  const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
650  const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
651  unsigned Offs = 0;
652  while (1) {
653    // Skip over the contents of the line.
654    // TODO: Vectorize this?  This is very performance sensitive for programs
655    // with lots of diagnostics and in -E mode.
656    const unsigned char *NextBuf = (const unsigned char *)Buf;
657    while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
658      ++NextBuf;
659    Offs += NextBuf-Buf;
660    Buf = NextBuf;
661
662    if (Buf[0] == '\n' || Buf[0] == '\r') {
663      // If this is \n\r or \r\n, skip both characters.
664      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
665        ++Offs, ++Buf;
666      ++Offs, ++Buf;
667      LineOffsets.push_back(Offs);
668    } else {
669      // Otherwise, this is a null.  If end of file, exit.
670      if (Buf == End) break;
671      // Otherwise, skip the null.
672      ++Offs, ++Buf;
673    }
674  }
675
676  // Copy the offsets into the FileInfo structure.
677  FI->NumLines = LineOffsets.size();
678  FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size());
679  std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
680}
681
682/// getLineNumber - Given a SourceLocation, return the spelling line number
683/// for the position indicated.  This requires building and caching a table of
684/// line offsets for the MemoryBuffer, so this is not cheap: use only when
685/// about to emit a diagnostic.
686unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
687  ContentCache *Content;
688  if (LastLineNoFileIDQuery == FID)
689    Content = LastLineNoContentCache;
690  else
691    Content = const_cast<ContentCache*>(getSLocEntry(FID)
692                                        .getFile().getContentCache());
693
694  // If this is the first use of line information for this buffer, compute the
695  /// SourceLineCache for it on demand.
696  if (Content->SourceLineCache == 0)
697    ComputeLineNumbers(Content, ContentCacheAlloc);
698
699  // Okay, we know we have a line number table.  Do a binary search to find the
700  // line number that this character position lands on.
701  unsigned *SourceLineCache = Content->SourceLineCache;
702  unsigned *SourceLineCacheStart = SourceLineCache;
703  unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
704
705  unsigned QueriedFilePos = FilePos+1;
706
707  // If the previous query was to the same file, we know both the file pos from
708  // that query and the line number returned.  This allows us to narrow the
709  // search space from the entire file to something near the match.
710  if (LastLineNoFileIDQuery == FID) {
711    if (QueriedFilePos >= LastLineNoFilePos) {
712      SourceLineCache = SourceLineCache+LastLineNoResult-1;
713
714      // The query is likely to be nearby the previous one.  Here we check to
715      // see if it is within 5, 10 or 20 lines.  It can be far away in cases
716      // where big comment blocks and vertical whitespace eat up lines but
717      // contribute no tokens.
718      if (SourceLineCache+5 < SourceLineCacheEnd) {
719        if (SourceLineCache[5] > QueriedFilePos)
720          SourceLineCacheEnd = SourceLineCache+5;
721        else if (SourceLineCache+10 < SourceLineCacheEnd) {
722          if (SourceLineCache[10] > QueriedFilePos)
723            SourceLineCacheEnd = SourceLineCache+10;
724          else if (SourceLineCache+20 < SourceLineCacheEnd) {
725            if (SourceLineCache[20] > QueriedFilePos)
726              SourceLineCacheEnd = SourceLineCache+20;
727          }
728        }
729      }
730    } else {
731      SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
732    }
733  }
734
735  // If the spread is large, do a "radix" test as our initial guess, based on
736  // the assumption that lines average to approximately the same length.
737  // NOTE: This is currently disabled, as it does not appear to be profitable in
738  // initial measurements.
739  if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
740    unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
741
742    // Take a stab at guessing where it is.
743    unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
744
745    // Check for -10 and +10 lines.
746    unsigned LowerBound = std::max(int(ApproxPos-10), 0);
747    unsigned UpperBound = std::min(ApproxPos+10, FileLen);
748
749    // If the computed lower bound is less than the query location, move it in.
750    if (SourceLineCache < SourceLineCacheStart+LowerBound &&
751        SourceLineCacheStart[LowerBound] < QueriedFilePos)
752      SourceLineCache = SourceLineCacheStart+LowerBound;
753
754    // If the computed upper bound is greater than the query location, move it.
755    if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
756        SourceLineCacheStart[UpperBound] >= QueriedFilePos)
757      SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
758  }
759
760  unsigned *Pos
761    = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
762  unsigned LineNo = Pos-SourceLineCacheStart;
763
764  LastLineNoFileIDQuery = FID;
765  LastLineNoContentCache = Content;
766  LastLineNoFilePos = QueriedFilePos;
767  LastLineNoResult = LineNo;
768  return LineNo;
769}
770
771unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const {
772  if (Loc.isInvalid()) return 0;
773  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
774  return getLineNumber(LocInfo.first, LocInfo.second);
775}
776unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const {
777  if (Loc.isInvalid()) return 0;
778  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
779  return getLineNumber(LocInfo.first, LocInfo.second);
780}
781
782/// getFileCharacteristic - return the file characteristic of the specified
783/// source location, indicating whether this is a normal file, a system
784/// header, or an "implicit extern C" system header.
785///
786/// This state can be modified with flags on GNU linemarker directives like:
787///   # 4 "foo.h" 3
788/// which changes all source locations in the current file after that to be
789/// considered to be from a system header.
790SrcMgr::CharacteristicKind
791SourceManager::getFileCharacteristic(SourceLocation Loc) const {
792  assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!");
793  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
794  const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
795
796  // If there are no #line directives in this file, just return the whole-file
797  // state.
798  if (!FI.hasLineDirectives())
799    return FI.getFileCharacteristic();
800
801  assert(LineTable && "Can't have linetable entries without a LineTable!");
802  // See if there is a #line directive before the location.
803  const LineEntry *Entry =
804    LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second);
805
806  // If this is before the first line marker, use the file characteristic.
807  if (!Entry)
808    return FI.getFileCharacteristic();
809
810  return Entry->FileKind;
811}
812
813
814/// getPresumedLoc - This method returns the "presumed" location of a
815/// SourceLocation specifies.  A "presumed location" can be modified by #line
816/// or GNU line marker directives.  This provides a view on the data that a
817/// user should see in diagnostics, for example.
818///
819/// Note that a presumed location is always given as the instantiation point
820/// of an instantiation location, not at the spelling location.
821PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const {
822  if (Loc.isInvalid()) return PresumedLoc();
823
824  // Presumed locations are always for instantiation points.
825  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
826
827  const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
828  const SrcMgr::ContentCache *C = FI.getContentCache();
829
830  // To get the source name, first consult the FileEntry (if one exists)
831  // before the MemBuffer as this will avoid unnecessarily paging in the
832  // MemBuffer.
833  const char *Filename =
834    C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
835  unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second);
836  unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second);
837  SourceLocation IncludeLoc = FI.getIncludeLoc();
838
839  // If we have #line directives in this file, update and overwrite the physical
840  // location info if appropriate.
841  if (FI.hasLineDirectives()) {
842    assert(LineTable && "Can't have linetable entries without a LineTable!");
843    // See if there is a #line directive before this.  If so, get it.
844    if (const LineEntry *Entry =
845          LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) {
846      // If the LineEntry indicates a filename, use it.
847      if (Entry->FilenameID != -1)
848        Filename = LineTable->getFilename(Entry->FilenameID);
849
850      // Use the line number specified by the LineEntry.  This line number may
851      // be multiple lines down from the line entry.  Add the difference in
852      // physical line numbers from the query point and the line marker to the
853      // total.
854      unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset);
855      LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1);
856
857      // Note that column numbers are not molested by line markers.
858
859      // Handle virtual #include manipulation.
860      if (Entry->IncludeOffset) {
861        IncludeLoc = getLocForStartOfFile(LocInfo.first);
862        IncludeLoc = IncludeLoc.getFileLocWithOffset(Entry->IncludeOffset);
863      }
864    }
865  }
866
867  return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc);
868}
869
870//===----------------------------------------------------------------------===//
871// Other miscellaneous methods.
872//===----------------------------------------------------------------------===//
873
874
875/// PrintStats - Print statistics to stderr.
876///
877void SourceManager::PrintStats() const {
878  llvm::cerr << "\n*** Source Manager Stats:\n";
879  llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
880             << " mem buffers mapped.\n";
881  llvm::cerr << SLocEntryTable.size() << " SLocEntry's allocated, "
882             << NextOffset << "B of Sloc address space used.\n";
883
884  unsigned NumLineNumsComputed = 0;
885  unsigned NumFileBytesMapped = 0;
886  for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
887    NumLineNumsComputed += I->second->SourceLineCache != 0;
888    NumFileBytesMapped  += I->second->getSizeBytesMapped();
889  }
890
891  llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
892             << NumLineNumsComputed << " files with line #'s computed.\n";
893  llvm::cerr << "FileID scans: " << NumLinearScans << " linear, "
894             << NumBinaryProbes << " binary.\n";
895}
896
897//===----------------------------------------------------------------------===//
898// Serialization.
899//===----------------------------------------------------------------------===//
900
901void ContentCache::Emit(llvm::Serializer& S) const {
902  S.FlushRecord();
903  S.EmitPtr(this);
904
905  if (Entry) {
906    llvm::sys::Path Fname(Buffer->getBufferIdentifier());
907
908    if (Fname.isAbsolute())
909      S.EmitCStr(Fname.c_str());
910    else {
911      // Create an absolute path.
912      // FIXME: This will potentially contain ".." and "." in the path.
913      llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
914      path.appendComponent(Fname.c_str());
915      S.EmitCStr(path.c_str());
916    }
917  }
918  else {
919    const char* p = Buffer->getBufferStart();
920    const char* e = Buffer->getBufferEnd();
921
922    S.EmitInt(e-p);
923
924    for ( ; p != e; ++p)
925      S.EmitInt(*p);
926  }
927
928  S.FlushRecord();
929}
930
931void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
932                                       SourceManager& SMgr,
933                                       FileManager* FMgr,
934                                       std::vector<char>& Buf) {
935  if (FMgr) {
936    llvm::SerializedPtrID PtrID = D.ReadPtrID();
937    D.ReadCStr(Buf,false);
938
939    // Create/fetch the FileEntry.
940    const char* start = &Buf[0];
941    const FileEntry* E = FMgr->getFile(start,start+Buf.size());
942
943    // FIXME: Ideally we want a lazy materialization of the ContentCache
944    //  anyway, because we don't want to read in source files unless this
945    //  is absolutely needed.
946    if (!E)
947      D.RegisterPtr(PtrID,NULL);
948    else
949      // Get the ContextCache object and register it with the deserializer.
950      D.RegisterPtr(PtrID, SMgr.getOrCreateContentCache(E));
951    return;
952  }
953
954  // Register the ContextCache object with the deserializer.
955  /* FIXME:
956  ContentCache *Entry
957  SMgr.MemBufferInfos.push_back(ContentCache());
958   = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
959  D.RegisterPtr(&Entry);
960
961  // Create the buffer.
962  unsigned Size = D.ReadInt();
963  Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
964
965  // Read the contents of the buffer.
966  char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
967  for (unsigned i = 0; i < Size ; ++i)
968    p[i] = D.ReadInt();
969   */
970}
971
972void SourceManager::Emit(llvm::Serializer& S) const {
973  S.EnterBlock();
974  S.EmitPtr(this);
975  S.EmitInt(MainFileID.getOpaqueValue());
976
977  // Emit: FileInfos.  Just emit the file name.
978  S.EnterBlock();
979
980  // FIXME: Emit FileInfos.
981  //std::for_each(FileInfos.begin(), FileInfos.end(),
982  //              S.MakeEmitter<ContentCache>());
983
984  S.ExitBlock();
985
986  // Emit: MemBufferInfos
987  S.EnterBlock();
988
989  /* FIXME: EMIT.
990  std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
991                S.MakeEmitter<ContentCache>());
992   */
993
994  S.ExitBlock();
995
996  // FIXME: Emit SLocEntryTable.
997
998  S.ExitBlock();
999}
1000
1001SourceManager*
1002SourceManager::CreateAndRegister(llvm::Deserializer &D, FileManager &FMgr) {
1003  SourceManager *M = new SourceManager();
1004  D.RegisterPtr(M);
1005
1006  // Read: the FileID of the main source file of the translation unit.
1007  M->MainFileID = FileID::get(D.ReadInt());
1008
1009  std::vector<char> Buf;
1010
1011  /*{ // FIXME Read: FileInfos.
1012    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
1013    while (!D.FinishedBlock(BLoc))
1014    ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
1015  }*/
1016
1017  { // Read: MemBufferInfos.
1018    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
1019    while (!D.FinishedBlock(BLoc))
1020    ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
1021  }
1022
1023  // FIXME: Read SLocEntryTable.
1024
1025  return M;
1026}
1027