SourceManager.cpp revision 387616edf98739f4a0dd234c907e2b913e6a535d
1//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/MemoryBuffer.h"
18#include "llvm/System/Path.h"
19#include "llvm/Bitcode/Serialize.h"
20#include "llvm/Bitcode/Deserialize.h"
21#include "llvm/Support/Streams.h"
22#include <algorithm>
23using namespace clang;
24using namespace SrcMgr;
25using llvm::MemoryBuffer;
26
27//===----------------------------------------------------------------------===//
28// SourceManager Helper Classes
29//===----------------------------------------------------------------------===//
30
31ContentCache::~ContentCache() {
32  delete Buffer;
33}
34
35/// getSizeBytesMapped - Returns the number of bytes actually mapped for
36///  this ContentCache.  This can be 0 if the MemBuffer was not actually
37///  instantiated.
38unsigned ContentCache::getSizeBytesMapped() const {
39  return Buffer ? Buffer->getBufferSize() : 0;
40}
41
42/// getSize - Returns the size of the content encapsulated by this ContentCache.
43///  This can be the size of the source file or the size of an arbitrary
44///  scratch buffer.  If the ContentCache encapsulates a source file, that
45///  file is not lazily brought in from disk to satisfy this query.
46unsigned ContentCache::getSize() const {
47  return Entry ? Entry->getSize() : Buffer->getBufferSize();
48}
49
50const llvm::MemoryBuffer *ContentCache::getBuffer() const {
51  // Lazily create the Buffer for ContentCaches that wrap files.
52  if (!Buffer && Entry) {
53    // FIXME: Should we support a way to not have to do this check over
54    //   and over if we cannot open the file?
55    Buffer = MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize());
56  }
57  return Buffer;
58}
59
60//===----------------------------------------------------------------------===//
61// Line Table Implementation
62//===----------------------------------------------------------------------===//
63
64namespace clang {
65struct LineEntry {
66  /// FileOffset - The offset in this file that the line entry occurs at.
67  unsigned FileOffset;
68
69  /// LineNo - The presumed line number of this line entry: #line 4.
70  unsigned LineNo;
71
72  /// FilenameID - The ID of the filename identified by this line entry:
73  /// #line 4 "foo.c".  This is -1 if not specified.
74  int FilenameID;
75
76  /// Flags - Set the 0 if no flags, 1 if a system header,
77  SrcMgr::CharacteristicKind FileKind;
78
79  /// IncludeOffset - This is the offset of the virtual include stack location,
80  /// which is manipulated by GNU linemarker directives.  If this is 0 then
81  /// there is no virtual #includer.
82  unsigned IncludeOffset;
83
84  static LineEntry get(unsigned Offs, unsigned Line, int Filename,
85                       SrcMgr::CharacteristicKind FileKind,
86                       unsigned IncludeOffset) {
87    LineEntry E;
88    E.FileOffset = Offs;
89    E.LineNo = Line;
90    E.FilenameID = Filename;
91    E.FileKind = FileKind;
92    E.IncludeOffset = IncludeOffset;
93    return E;
94  }
95};
96
97// needed for FindNearestLineEntry (upper_bound of LineEntry)
98inline bool operator<(const LineEntry &lhs, const LineEntry &rhs) {
99  // FIXME: should check the other field?
100  return lhs.FileOffset < rhs.FileOffset;
101}
102
103inline bool operator<(const LineEntry &E, unsigned Offset) {
104  return E.FileOffset < Offset;
105}
106
107inline bool operator<(unsigned Offset, const LineEntry &E) {
108  return Offset < E.FileOffset;
109}
110
111/// LineTableInfo - This class is used to hold and unique data used to
112/// represent #line information.
113class LineTableInfo {
114  /// FilenameIDs - This map is used to assign unique IDs to filenames in
115  /// #line directives.  This allows us to unique the filenames that
116  /// frequently reoccur and reference them with indices.  FilenameIDs holds
117  /// the mapping from string -> ID, and FilenamesByID holds the mapping of ID
118  /// to string.
119  llvm::StringMap<unsigned, llvm::BumpPtrAllocator> FilenameIDs;
120  std::vector<llvm::StringMapEntry<unsigned>*> FilenamesByID;
121
122  /// LineEntries - This is a map from FileIDs to a list of line entries (sorted
123  /// by the offset they occur in the file.
124  std::map<unsigned, std::vector<LineEntry> > LineEntries;
125public:
126  LineTableInfo() {
127  }
128
129  void clear() {
130    FilenameIDs.clear();
131    FilenamesByID.clear();
132  }
133
134  ~LineTableInfo() {}
135
136  unsigned getLineTableFilenameID(const char *Ptr, unsigned Len);
137  const char *getFilename(unsigned ID) const {
138    assert(ID < FilenamesByID.size() && "Invalid FilenameID");
139    return FilenamesByID[ID]->getKeyData();
140  }
141
142  void AddLineNote(unsigned FID, unsigned Offset,
143                   unsigned LineNo, int FilenameID);
144  void AddLineNote(unsigned FID, unsigned Offset,
145                   unsigned LineNo, int FilenameID,
146                   unsigned EntryExit, SrcMgr::CharacteristicKind FileKind);
147
148
149  /// FindNearestLineEntry - Find the line entry nearest to FID that is before
150  /// it.  If there is no line entry before Offset in FID, return null.
151  const LineEntry *FindNearestLineEntry(unsigned FID, unsigned Offset);
152};
153} // namespace clang
154
155unsigned LineTableInfo::getLineTableFilenameID(const char *Ptr, unsigned Len) {
156  // Look up the filename in the string table, returning the pre-existing value
157  // if it exists.
158  llvm::StringMapEntry<unsigned> &Entry =
159    FilenameIDs.GetOrCreateValue(Ptr, Ptr+Len, ~0U);
160  if (Entry.getValue() != ~0U)
161    return Entry.getValue();
162
163  // Otherwise, assign this the next available ID.
164  Entry.setValue(FilenamesByID.size());
165  FilenamesByID.push_back(&Entry);
166  return FilenamesByID.size()-1;
167}
168
169/// AddLineNote - Add a line note to the line table that indicates that there
170/// is a #line at the specified FID/Offset location which changes the presumed
171/// location to LineNo/FilenameID.
172void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
173                                unsigned LineNo, int FilenameID) {
174  std::vector<LineEntry> &Entries = LineEntries[FID];
175
176  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
177         "Adding line entries out of order!");
178
179  SrcMgr::CharacteristicKind Kind = SrcMgr::C_User;
180  unsigned IncludeOffset = 0;
181
182  if (!Entries.empty()) {
183    // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember
184    // that we are still in "foo.h".
185    if (FilenameID == -1)
186      FilenameID = Entries.back().FilenameID;
187
188    // If we are after a line marker that switched us to system header mode, or
189    // that set #include information, preserve it.
190    Kind = Entries.back().FileKind;
191    IncludeOffset = Entries.back().IncludeOffset;
192  }
193
194  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind,
195                                   IncludeOffset));
196}
197
198/// AddLineNote This is the same as the previous version of AddLineNote, but is
199/// used for GNU line markers.  If EntryExit is 0, then this doesn't change the
200/// presumed #include stack.  If it is 1, this is a file entry, if it is 2 then
201/// this is a file exit.  FileKind specifies whether this is a system header or
202/// extern C system header.
203void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
204                                unsigned LineNo, int FilenameID,
205                                unsigned EntryExit,
206                                SrcMgr::CharacteristicKind FileKind) {
207  assert(FilenameID != -1 && "Unspecified filename should use other accessor");
208
209  std::vector<LineEntry> &Entries = LineEntries[FID];
210
211  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
212         "Adding line entries out of order!");
213
214  unsigned IncludeOffset = 0;
215  if (EntryExit == 0) {  // No #include stack change.
216    IncludeOffset = Entries.empty() ? 0 : Entries.back().IncludeOffset;
217  } else if (EntryExit == 1) {
218    IncludeOffset = Offset-1;
219  } else if (EntryExit == 2) {
220    assert(!Entries.empty() && Entries.back().IncludeOffset &&
221       "PPDirectives should have caught case when popping empty include stack");
222
223    // Get the include loc of the last entries' include loc as our include loc.
224    IncludeOffset = 0;
225    if (const LineEntry *PrevEntry =
226          FindNearestLineEntry(FID, Entries.back().IncludeOffset))
227      IncludeOffset = PrevEntry->IncludeOffset;
228  }
229
230  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind,
231                                   IncludeOffset));
232}
233
234
235/// FindNearestLineEntry - Find the line entry nearest to FID that is before
236/// it.  If there is no line entry before Offset in FID, return null.
237const LineEntry *LineTableInfo::FindNearestLineEntry(unsigned FID,
238                                                     unsigned Offset) {
239  const std::vector<LineEntry> &Entries = LineEntries[FID];
240  assert(!Entries.empty() && "No #line entries for this FID after all!");
241
242  // It is very common for the query to be after the last #line, check this
243  // first.
244  if (Entries.back().FileOffset <= Offset)
245    return &Entries.back();
246
247  // Do a binary search to find the maximal element that is still before Offset.
248  std::vector<LineEntry>::const_iterator I =
249    std::upper_bound(Entries.begin(), Entries.end(), Offset);
250  if (I == Entries.begin()) return 0;
251  return &*--I;
252}
253
254
255/// getLineTableFilenameID - Return the uniqued ID for the specified filename.
256///
257unsigned SourceManager::getLineTableFilenameID(const char *Ptr, unsigned Len) {
258  if (LineTable == 0)
259    LineTable = new LineTableInfo();
260  return LineTable->getLineTableFilenameID(Ptr, Len);
261}
262
263
264/// AddLineNote - Add a line note to the line table for the FileID and offset
265/// specified by Loc.  If FilenameID is -1, it is considered to be
266/// unspecified.
267void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
268                                int FilenameID) {
269  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
270
271  const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
272
273  // Remember that this file has #line directives now if it doesn't already.
274  const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
275
276  if (LineTable == 0)
277    LineTable = new LineTableInfo();
278  LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID);
279}
280
281/// AddLineNote - Add a GNU line marker to the line table.
282void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
283                                int FilenameID, bool IsFileEntry,
284                                bool IsFileExit, bool IsSystemHeader,
285                                bool IsExternCHeader) {
286  // If there is no filename and no flags, this is treated just like a #line,
287  // which does not change the flags of the previous line marker.
288  if (FilenameID == -1) {
289    assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader &&
290           "Can't set flags without setting the filename!");
291    return AddLineNote(Loc, LineNo, FilenameID);
292  }
293
294  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
295  const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
296
297  // Remember that this file has #line directives now if it doesn't already.
298  const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
299
300  if (LineTable == 0)
301    LineTable = new LineTableInfo();
302
303  SrcMgr::CharacteristicKind FileKind;
304  if (IsExternCHeader)
305    FileKind = SrcMgr::C_ExternCSystem;
306  else if (IsSystemHeader)
307    FileKind = SrcMgr::C_System;
308  else
309    FileKind = SrcMgr::C_User;
310
311  unsigned EntryExit = 0;
312  if (IsFileEntry)
313    EntryExit = 1;
314  else if (IsFileExit)
315    EntryExit = 2;
316
317  LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID,
318                         EntryExit, FileKind);
319}
320
321
322//===----------------------------------------------------------------------===//
323// Private 'Create' methods.
324//===----------------------------------------------------------------------===//
325
326SourceManager::~SourceManager() {
327  delete LineTable;
328
329  // Delete FileEntry objects corresponding to content caches.  Since the actual
330  // content cache objects are bump pointer allocated, we just have to run the
331  // dtors, but we call the deallocate method for completeness.
332  for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) {
333    MemBufferInfos[i]->~ContentCache();
334    ContentCacheAlloc.Deallocate(MemBufferInfos[i]);
335  }
336  for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator
337       I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
338    I->second->~ContentCache();
339    ContentCacheAlloc.Deallocate(I->second);
340  }
341}
342
343void SourceManager::clearIDTables() {
344  MainFileID = FileID();
345  SLocEntryTable.clear();
346  LastLineNoFileIDQuery = FileID();
347  LastLineNoContentCache = 0;
348  LastFileIDLookup = FileID();
349
350  if (LineTable)
351    LineTable->clear();
352
353  // Use up FileID #0 as an invalid instantiation.
354  NextOffset = 0;
355  createInstantiationLoc(SourceLocation(),SourceLocation(),SourceLocation(), 1);
356}
357
358/// getOrCreateContentCache - Create or return a cached ContentCache for the
359/// specified file.
360const ContentCache *
361SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) {
362  assert(FileEnt && "Didn't specify a file entry to use?");
363
364  // Do we already have information about this file?
365  ContentCache *&Entry = FileInfos[FileEnt];
366  if (Entry) return Entry;
367
368  // Nope, create a new Cache entry.  Make sure it is at least 8-byte aligned
369  // so that FileInfo can use the low 3 bits of the pointer for its own
370  // nefarious purposes.
371  unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
372  EntryAlign = std::max(8U, EntryAlign);
373  Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
374  new (Entry) ContentCache(FileEnt);
375  return Entry;
376}
377
378
379/// createMemBufferContentCache - Create a new ContentCache for the specified
380///  memory buffer.  This does no caching.
381const ContentCache*
382SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
383  // Add a new ContentCache to the MemBufferInfos list and return it.  Make sure
384  // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of
385  // the pointer for its own nefarious purposes.
386  unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
387  EntryAlign = std::max(8U, EntryAlign);
388  ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
389  new (Entry) ContentCache();
390  MemBufferInfos.push_back(Entry);
391  Entry->setBuffer(Buffer);
392  return Entry;
393}
394
395//===----------------------------------------------------------------------===//
396// Methods to create new FileID's and instantiations.
397//===----------------------------------------------------------------------===//
398
399/// createFileID - Create a new fileID for the specified ContentCache and
400/// include position.  This works regardless of whether the ContentCache
401/// corresponds to a file or some other input source.
402FileID SourceManager::createFileID(const ContentCache *File,
403                                   SourceLocation IncludePos,
404                                   SrcMgr::CharacteristicKind FileCharacter) {
405  SLocEntryTable.push_back(SLocEntry::get(NextOffset,
406                                          FileInfo::get(IncludePos, File,
407                                                        FileCharacter)));
408  unsigned FileSize = File->getSize();
409  assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!");
410  NextOffset += FileSize+1;
411
412  // Set LastFileIDLookup to the newly created file.  The next getFileID call is
413  // almost guaranteed to be from that file.
414  return LastFileIDLookup = FileID::get(SLocEntryTable.size()-1);
415}
416
417/// createInstantiationLoc - Return a new SourceLocation that encodes the fact
418/// that a token from SpellingLoc should actually be referenced from
419/// InstantiationLoc.
420SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
421                                                     SourceLocation ILocStart,
422                                                     SourceLocation ILocEnd,
423                                                     unsigned TokLength) {
424  InstantiationInfo II = InstantiationInfo::get(ILocStart,ILocEnd, SpellingLoc);
425  SLocEntryTable.push_back(SLocEntry::get(NextOffset, II));
426  assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!");
427  NextOffset += TokLength+1;
428  return SourceLocation::getMacroLoc(NextOffset-(TokLength+1));
429}
430
431/// getBufferData - Return a pointer to the start and end of the source buffer
432/// data for the specified FileID.
433std::pair<const char*, const char*>
434SourceManager::getBufferData(FileID FID) const {
435  const llvm::MemoryBuffer *Buf = getBuffer(FID);
436  return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
437}
438
439
440//===----------------------------------------------------------------------===//
441// SourceLocation manipulation methods.
442//===----------------------------------------------------------------------===//
443
444/// getFileIDSlow - Return the FileID for a SourceLocation.  This is a very hot
445/// method that is used for all SourceManager queries that start with a
446/// SourceLocation object.  It is responsible for finding the entry in
447/// SLocEntryTable which contains the specified location.
448///
449FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
450  assert(SLocOffset && "Invalid FileID");
451
452  // After the first and second level caches, I see two common sorts of
453  // behavior: 1) a lot of searched FileID's are "near" the cached file location
454  // or are "near" the cached instantiation location.  2) others are just
455  // completely random and may be a very long way away.
456  //
457  // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
458  // then we fall back to a less cache efficient, but more scalable, binary
459  // search to find the location.
460
461  // See if this is near the file point - worst case we start scanning from the
462  // most newly created FileID.
463  std::vector<SrcMgr::SLocEntry>::const_iterator I;
464
465  if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) {
466    // Neither loc prunes our search.
467    I = SLocEntryTable.end();
468  } else {
469    // Perhaps it is near the file point.
470    I = SLocEntryTable.begin()+LastFileIDLookup.ID;
471  }
472
473  // Find the FileID that contains this.  "I" is an iterator that points to a
474  // FileID whose offset is known to be larger than SLocOffset.
475  unsigned NumProbes = 0;
476  while (1) {
477    --I;
478    if (I->getOffset() <= SLocOffset) {
479#if 0
480      printf("lin %d -> %d [%s] %d %d\n", SLocOffset,
481             I-SLocEntryTable.begin(),
482             I->isInstantiation() ? "inst" : "file",
483             LastFileIDLookup.ID,  int(SLocEntryTable.end()-I));
484#endif
485      FileID Res = FileID::get(I-SLocEntryTable.begin());
486
487      // If this isn't an instantiation, remember it.  We have good locality
488      // across FileID lookups.
489      if (!I->isInstantiation())
490        LastFileIDLookup = Res;
491      NumLinearScans += NumProbes+1;
492      return Res;
493    }
494    if (++NumProbes == 8)
495      break;
496  }
497
498  // Convert "I" back into an index.  We know that it is an entry whose index is
499  // larger than the offset we are looking for.
500  unsigned GreaterIndex = I-SLocEntryTable.begin();
501  // LessIndex - This is the lower bound of the range that we're searching.
502  // We know that the offset corresponding to the FileID is is less than
503  // SLocOffset.
504  unsigned LessIndex = 0;
505  NumProbes = 0;
506  while (1) {
507    unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
508    unsigned MidOffset = SLocEntryTable[MiddleIndex].getOffset();
509
510    ++NumProbes;
511
512    // If the offset of the midpoint is too large, chop the high side of the
513    // range to the midpoint.
514    if (MidOffset > SLocOffset) {
515      GreaterIndex = MiddleIndex;
516      continue;
517    }
518
519    // If the middle index contains the value, succeed and return.
520    if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) {
521#if 0
522      printf("bin %d -> %d [%s] %d %d\n", SLocOffset,
523             I-SLocEntryTable.begin(),
524             I->isInstantiation() ? "inst" : "file",
525             LastFileIDLookup.ID, int(SLocEntryTable.end()-I));
526#endif
527      FileID Res = FileID::get(MiddleIndex);
528
529      // If this isn't an instantiation, remember it.  We have good locality
530      // across FileID lookups.
531      if (!I->isInstantiation())
532        LastFileIDLookup = Res;
533      NumBinaryProbes += NumProbes;
534      return Res;
535    }
536
537    // Otherwise, move the low-side up to the middle index.
538    LessIndex = MiddleIndex;
539  }
540}
541
542SourceLocation SourceManager::
543getInstantiationLocSlowCase(SourceLocation Loc) const {
544  do {
545    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
546    Loc = getSLocEntry(LocInfo.first).getInstantiation()
547                   .getInstantiationLocStart();
548    Loc = Loc.getFileLocWithOffset(LocInfo.second);
549  } while (!Loc.isFileID());
550
551  return Loc;
552}
553
554SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const {
555  do {
556    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
557    Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
558    Loc = Loc.getFileLocWithOffset(LocInfo.second);
559  } while (!Loc.isFileID());
560  return Loc;
561}
562
563
564std::pair<FileID, unsigned>
565SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
566                                                     unsigned Offset) const {
567  // If this is an instantiation record, walk through all the instantiation
568  // points.
569  FileID FID;
570  SourceLocation Loc;
571  do {
572    Loc = E->getInstantiation().getInstantiationLocStart();
573
574    FID = getFileID(Loc);
575    E = &getSLocEntry(FID);
576    Offset += Loc.getOffset()-E->getOffset();
577  } while (!Loc.isFileID());
578
579  return std::make_pair(FID, Offset);
580}
581
582std::pair<FileID, unsigned>
583SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
584                                                unsigned Offset) const {
585  // If this is an instantiation record, walk through all the instantiation
586  // points.
587  FileID FID;
588  SourceLocation Loc;
589  do {
590    Loc = E->getInstantiation().getSpellingLoc();
591
592    FID = getFileID(Loc);
593    E = &getSLocEntry(FID);
594    Offset += Loc.getOffset()-E->getOffset();
595  } while (!Loc.isFileID());
596
597  return std::make_pair(FID, Offset);
598}
599
600/// getImmediateSpellingLoc - Given a SourceLocation object, return the
601/// spelling location referenced by the ID.  This is the first level down
602/// towards the place where the characters that make up the lexed token can be
603/// found.  This should not generally be used by clients.
604SourceLocation SourceManager::getImmediateSpellingLoc(SourceLocation Loc) const{
605  if (Loc.isFileID()) return Loc;
606  std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
607  Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
608  return Loc.getFileLocWithOffset(LocInfo.second);
609}
610
611
612/// getImmediateInstantiationRange - Loc is required to be an instantiation
613/// location.  Return the start/end of the instantiation information.
614std::pair<SourceLocation,SourceLocation>
615SourceManager::getImmediateInstantiationRange(SourceLocation Loc) const {
616  assert(Loc.isMacroID() && "Not an instantiation loc!");
617  const InstantiationInfo &II = getSLocEntry(getFileID(Loc)).getInstantiation();
618  return II.getInstantiationLocRange();
619}
620
621/// getInstantiationRange - Given a SourceLocation object, return the
622/// range of tokens covered by the instantiation in the ultimate file.
623std::pair<SourceLocation,SourceLocation>
624SourceManager::getInstantiationRange(SourceLocation Loc) const {
625  if (Loc.isFileID()) return std::make_pair(Loc, Loc);
626
627  std::pair<SourceLocation,SourceLocation> Res =
628    getImmediateInstantiationRange(Loc);
629
630  // Fully resolve the start and end locations to their ultimate instantiation
631  // points.
632  while (!Res.first.isFileID())
633    Res.first = getImmediateInstantiationRange(Res.first).first;
634  while (!Res.second.isFileID())
635    Res.second = getImmediateInstantiationRange(Res.second).second;
636  return Res;
637}
638
639
640
641//===----------------------------------------------------------------------===//
642// Queries about the code at a SourceLocation.
643//===----------------------------------------------------------------------===//
644
645/// getCharacterData - Return a pointer to the start of the specified location
646/// in the appropriate MemoryBuffer.
647const char *SourceManager::getCharacterData(SourceLocation SL) const {
648  // Note that this is a hot function in the getSpelling() path, which is
649  // heavily used by -E mode.
650  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
651
652  // Note that calling 'getBuffer()' may lazily page in a source file.
653  return getSLocEntry(LocInfo.first).getFile().getContentCache()
654              ->getBuffer()->getBufferStart() + LocInfo.second;
655}
656
657
658/// getColumnNumber - Return the column # for the specified file position.
659/// this is significantly cheaper to compute than the line number.
660unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
661  const char *Buf = getBuffer(FID)->getBufferStart();
662
663  unsigned LineStart = FilePos;
664  while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
665    --LineStart;
666  return FilePos-LineStart+1;
667}
668
669unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const {
670  if (Loc.isInvalid()) return 0;
671  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
672  return getColumnNumber(LocInfo.first, LocInfo.second);
673}
674
675unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const {
676  if (Loc.isInvalid()) return 0;
677  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
678  return getColumnNumber(LocInfo.first, LocInfo.second);
679}
680
681
682
683static void ComputeLineNumbers(ContentCache* FI,
684                               llvm::BumpPtrAllocator &Alloc) DISABLE_INLINE;
685static void ComputeLineNumbers(ContentCache* FI, llvm::BumpPtrAllocator &Alloc){
686  // Note that calling 'getBuffer()' may lazily page in the file.
687  const MemoryBuffer *Buffer = FI->getBuffer();
688
689  // Find the file offsets of all of the *physical* source lines.  This does
690  // not look at trigraphs, escaped newlines, or anything else tricky.
691  std::vector<unsigned> LineOffsets;
692
693  // Line #1 starts at char 0.
694  LineOffsets.push_back(0);
695
696  const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
697  const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
698  unsigned Offs = 0;
699  while (1) {
700    // Skip over the contents of the line.
701    // TODO: Vectorize this?  This is very performance sensitive for programs
702    // with lots of diagnostics and in -E mode.
703    const unsigned char *NextBuf = (const unsigned char *)Buf;
704    while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
705      ++NextBuf;
706    Offs += NextBuf-Buf;
707    Buf = NextBuf;
708
709    if (Buf[0] == '\n' || Buf[0] == '\r') {
710      // If this is \n\r or \r\n, skip both characters.
711      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
712        ++Offs, ++Buf;
713      ++Offs, ++Buf;
714      LineOffsets.push_back(Offs);
715    } else {
716      // Otherwise, this is a null.  If end of file, exit.
717      if (Buf == End) break;
718      // Otherwise, skip the null.
719      ++Offs, ++Buf;
720    }
721  }
722
723  // Copy the offsets into the FileInfo structure.
724  FI->NumLines = LineOffsets.size();
725  FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size());
726  std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
727}
728
729/// getLineNumber - Given a SourceLocation, return the spelling line number
730/// for the position indicated.  This requires building and caching a table of
731/// line offsets for the MemoryBuffer, so this is not cheap: use only when
732/// about to emit a diagnostic.
733unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
734  ContentCache *Content;
735  if (LastLineNoFileIDQuery == FID)
736    Content = LastLineNoContentCache;
737  else
738    Content = const_cast<ContentCache*>(getSLocEntry(FID)
739                                        .getFile().getContentCache());
740
741  // If this is the first use of line information for this buffer, compute the
742  /// SourceLineCache for it on demand.
743  if (Content->SourceLineCache == 0)
744    ComputeLineNumbers(Content, ContentCacheAlloc);
745
746  // Okay, we know we have a line number table.  Do a binary search to find the
747  // line number that this character position lands on.
748  unsigned *SourceLineCache = Content->SourceLineCache;
749  unsigned *SourceLineCacheStart = SourceLineCache;
750  unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
751
752  unsigned QueriedFilePos = FilePos+1;
753
754  // If the previous query was to the same file, we know both the file pos from
755  // that query and the line number returned.  This allows us to narrow the
756  // search space from the entire file to something near the match.
757  if (LastLineNoFileIDQuery == FID) {
758    if (QueriedFilePos >= LastLineNoFilePos) {
759      SourceLineCache = SourceLineCache+LastLineNoResult-1;
760
761      // The query is likely to be nearby the previous one.  Here we check to
762      // see if it is within 5, 10 or 20 lines.  It can be far away in cases
763      // where big comment blocks and vertical whitespace eat up lines but
764      // contribute no tokens.
765      if (SourceLineCache+5 < SourceLineCacheEnd) {
766        if (SourceLineCache[5] > QueriedFilePos)
767          SourceLineCacheEnd = SourceLineCache+5;
768        else if (SourceLineCache+10 < SourceLineCacheEnd) {
769          if (SourceLineCache[10] > QueriedFilePos)
770            SourceLineCacheEnd = SourceLineCache+10;
771          else if (SourceLineCache+20 < SourceLineCacheEnd) {
772            if (SourceLineCache[20] > QueriedFilePos)
773              SourceLineCacheEnd = SourceLineCache+20;
774          }
775        }
776      }
777    } else {
778      SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
779    }
780  }
781
782  // If the spread is large, do a "radix" test as our initial guess, based on
783  // the assumption that lines average to approximately the same length.
784  // NOTE: This is currently disabled, as it does not appear to be profitable in
785  // initial measurements.
786  if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
787    unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
788
789    // Take a stab at guessing where it is.
790    unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
791
792    // Check for -10 and +10 lines.
793    unsigned LowerBound = std::max(int(ApproxPos-10), 0);
794    unsigned UpperBound = std::min(ApproxPos+10, FileLen);
795
796    // If the computed lower bound is less than the query location, move it in.
797    if (SourceLineCache < SourceLineCacheStart+LowerBound &&
798        SourceLineCacheStart[LowerBound] < QueriedFilePos)
799      SourceLineCache = SourceLineCacheStart+LowerBound;
800
801    // If the computed upper bound is greater than the query location, move it.
802    if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
803        SourceLineCacheStart[UpperBound] >= QueriedFilePos)
804      SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
805  }
806
807  unsigned *Pos
808    = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
809  unsigned LineNo = Pos-SourceLineCacheStart;
810
811  LastLineNoFileIDQuery = FID;
812  LastLineNoContentCache = Content;
813  LastLineNoFilePos = QueriedFilePos;
814  LastLineNoResult = LineNo;
815  return LineNo;
816}
817
818unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const {
819  if (Loc.isInvalid()) return 0;
820  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
821  return getLineNumber(LocInfo.first, LocInfo.second);
822}
823unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const {
824  if (Loc.isInvalid()) return 0;
825  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
826  return getLineNumber(LocInfo.first, LocInfo.second);
827}
828
829/// getFileCharacteristic - return the file characteristic of the specified
830/// source location, indicating whether this is a normal file, a system
831/// header, or an "implicit extern C" system header.
832///
833/// This state can be modified with flags on GNU linemarker directives like:
834///   # 4 "foo.h" 3
835/// which changes all source locations in the current file after that to be
836/// considered to be from a system header.
837SrcMgr::CharacteristicKind
838SourceManager::getFileCharacteristic(SourceLocation Loc) const {
839  assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!");
840  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
841  const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
842
843  // If there are no #line directives in this file, just return the whole-file
844  // state.
845  if (!FI.hasLineDirectives())
846    return FI.getFileCharacteristic();
847
848  assert(LineTable && "Can't have linetable entries without a LineTable!");
849  // See if there is a #line directive before the location.
850  const LineEntry *Entry =
851    LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second);
852
853  // If this is before the first line marker, use the file characteristic.
854  if (!Entry)
855    return FI.getFileCharacteristic();
856
857  return Entry->FileKind;
858}
859
860
861/// getPresumedLoc - This method returns the "presumed" location of a
862/// SourceLocation specifies.  A "presumed location" can be modified by #line
863/// or GNU line marker directives.  This provides a view on the data that a
864/// user should see in diagnostics, for example.
865///
866/// Note that a presumed location is always given as the instantiation point
867/// of an instantiation location, not at the spelling location.
868PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const {
869  if (Loc.isInvalid()) return PresumedLoc();
870
871  // Presumed locations are always for instantiation points.
872  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
873
874  const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
875  const SrcMgr::ContentCache *C = FI.getContentCache();
876
877  // To get the source name, first consult the FileEntry (if one exists)
878  // before the MemBuffer as this will avoid unnecessarily paging in the
879  // MemBuffer.
880  const char *Filename =
881    C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
882  unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second);
883  unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second);
884  SourceLocation IncludeLoc = FI.getIncludeLoc();
885
886  // If we have #line directives in this file, update and overwrite the physical
887  // location info if appropriate.
888  if (FI.hasLineDirectives()) {
889    assert(LineTable && "Can't have linetable entries without a LineTable!");
890    // See if there is a #line directive before this.  If so, get it.
891    if (const LineEntry *Entry =
892          LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) {
893      // If the LineEntry indicates a filename, use it.
894      if (Entry->FilenameID != -1)
895        Filename = LineTable->getFilename(Entry->FilenameID);
896
897      // Use the line number specified by the LineEntry.  This line number may
898      // be multiple lines down from the line entry.  Add the difference in
899      // physical line numbers from the query point and the line marker to the
900      // total.
901      unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset);
902      LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1);
903
904      // Note that column numbers are not molested by line markers.
905
906      // Handle virtual #include manipulation.
907      if (Entry->IncludeOffset) {
908        IncludeLoc = getLocForStartOfFile(LocInfo.first);
909        IncludeLoc = IncludeLoc.getFileLocWithOffset(Entry->IncludeOffset);
910      }
911    }
912  }
913
914  return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc);
915}
916
917//===----------------------------------------------------------------------===//
918// Other miscellaneous methods.
919//===----------------------------------------------------------------------===//
920
921
922/// PrintStats - Print statistics to stderr.
923///
924void SourceManager::PrintStats() const {
925  llvm::cerr << "\n*** Source Manager Stats:\n";
926  llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
927             << " mem buffers mapped.\n";
928  llvm::cerr << SLocEntryTable.size() << " SLocEntry's allocated, "
929             << NextOffset << "B of Sloc address space used.\n";
930
931  unsigned NumLineNumsComputed = 0;
932  unsigned NumFileBytesMapped = 0;
933  for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
934    NumLineNumsComputed += I->second->SourceLineCache != 0;
935    NumFileBytesMapped  += I->second->getSizeBytesMapped();
936  }
937
938  llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
939             << NumLineNumsComputed << " files with line #'s computed.\n";
940  llvm::cerr << "FileID scans: " << NumLinearScans << " linear, "
941             << NumBinaryProbes << " binary.\n";
942}
943
944//===----------------------------------------------------------------------===//
945// Serialization.
946//===----------------------------------------------------------------------===//
947
948void ContentCache::Emit(llvm::Serializer& S) const {
949  S.FlushRecord();
950  S.EmitPtr(this);
951
952  if (Entry) {
953    llvm::sys::Path Fname(Buffer->getBufferIdentifier());
954
955    if (Fname.isAbsolute())
956      S.EmitCStr(Fname.c_str());
957    else {
958      // Create an absolute path.
959      // FIXME: This will potentially contain ".." and "." in the path.
960      llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
961      path.appendComponent(Fname.c_str());
962      S.EmitCStr(path.c_str());
963    }
964  }
965  else {
966    const char* p = Buffer->getBufferStart();
967    const char* e = Buffer->getBufferEnd();
968
969    S.EmitInt(e-p);
970
971    for ( ; p != e; ++p)
972      S.EmitInt(*p);
973  }
974
975  S.FlushRecord();
976}
977
978void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
979                                       SourceManager& SMgr,
980                                       FileManager* FMgr,
981                                       std::vector<char>& Buf) {
982  if (FMgr) {
983    llvm::SerializedPtrID PtrID = D.ReadPtrID();
984    D.ReadCStr(Buf,false);
985
986    // Create/fetch the FileEntry.
987    const char* start = &Buf[0];
988    const FileEntry* E = FMgr->getFile(start,start+Buf.size());
989
990    // FIXME: Ideally we want a lazy materialization of the ContentCache
991    //  anyway, because we don't want to read in source files unless this
992    //  is absolutely needed.
993    if (!E)
994      D.RegisterPtr(PtrID,NULL);
995    else
996      // Get the ContextCache object and register it with the deserializer.
997      D.RegisterPtr(PtrID, SMgr.getOrCreateContentCache(E));
998    return;
999  }
1000
1001  // Register the ContextCache object with the deserializer.
1002  /* FIXME:
1003  ContentCache *Entry
1004  SMgr.MemBufferInfos.push_back(ContentCache());
1005   = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
1006  D.RegisterPtr(&Entry);
1007
1008  // Create the buffer.
1009  unsigned Size = D.ReadInt();
1010  Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
1011
1012  // Read the contents of the buffer.
1013  char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
1014  for (unsigned i = 0; i < Size ; ++i)
1015    p[i] = D.ReadInt();
1016   */
1017}
1018
1019void SourceManager::Emit(llvm::Serializer& S) const {
1020  S.EnterBlock();
1021  S.EmitPtr(this);
1022  S.EmitInt(MainFileID.getOpaqueValue());
1023
1024  // Emit: FileInfos.  Just emit the file name.
1025  S.EnterBlock();
1026
1027  // FIXME: Emit FileInfos.
1028  //std::for_each(FileInfos.begin(), FileInfos.end(),
1029  //              S.MakeEmitter<ContentCache>());
1030
1031  S.ExitBlock();
1032
1033  // Emit: MemBufferInfos
1034  S.EnterBlock();
1035
1036  /* FIXME: EMIT.
1037  std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
1038                S.MakeEmitter<ContentCache>());
1039   */
1040
1041  S.ExitBlock();
1042
1043  // FIXME: Emit SLocEntryTable.
1044
1045  S.ExitBlock();
1046}
1047
1048SourceManager*
1049SourceManager::CreateAndRegister(llvm::Deserializer &D, FileManager &FMgr) {
1050  SourceManager *M = new SourceManager();
1051  D.RegisterPtr(M);
1052
1053  // Read: the FileID of the main source file of the translation unit.
1054  M->MainFileID = FileID::get(D.ReadInt());
1055
1056  std::vector<char> Buf;
1057
1058  /*{ // FIXME Read: FileInfos.
1059    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
1060    while (!D.FinishedBlock(BLoc))
1061    ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
1062  }*/
1063
1064  { // Read: MemBufferInfos.
1065    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
1066    while (!D.FinishedBlock(BLoc))
1067    ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
1068  }
1069
1070  // FIXME: Read SLocEntryTable.
1071
1072  return M;
1073}
1074