SourceManager.cpp revision bd94500d3aa60092fb0f1e90f53fb0d03fa502a8
1//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/SourceManagerInternals.h"
16#include "clang/Basic/FileManager.h"
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/MemoryBuffer.h"
19#include "llvm/System/Path.h"
20#include "llvm/Bitcode/Serialize.h"
21#include "llvm/Bitcode/Deserialize.h"
22#include "llvm/Support/Streams.h"
23#include <algorithm>
24using namespace clang;
25using namespace SrcMgr;
26using llvm::MemoryBuffer;
27
28//===----------------------------------------------------------------------===//
29// SourceManager Helper Classes
30//===----------------------------------------------------------------------===//
31
32ContentCache::~ContentCache() {
33  delete Buffer;
34}
35
36/// getSizeBytesMapped - Returns the number of bytes actually mapped for
37///  this ContentCache.  This can be 0 if the MemBuffer was not actually
38///  instantiated.
39unsigned ContentCache::getSizeBytesMapped() const {
40  return Buffer ? Buffer->getBufferSize() : 0;
41}
42
43/// getSize - Returns the size of the content encapsulated by this ContentCache.
44///  This can be the size of the source file or the size of an arbitrary
45///  scratch buffer.  If the ContentCache encapsulates a source file, that
46///  file is not lazily brought in from disk to satisfy this query.
47unsigned ContentCache::getSize() const {
48  return Entry ? Entry->getSize() : Buffer->getBufferSize();
49}
50
51const llvm::MemoryBuffer *ContentCache::getBuffer() const {
52  // Lazily create the Buffer for ContentCaches that wrap files.
53  if (!Buffer && Entry) {
54    // FIXME: Should we support a way to not have to do this check over
55    //   and over if we cannot open the file?
56    Buffer = MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize());
57  }
58  return Buffer;
59}
60
61unsigned LineTableInfo::getLineTableFilenameID(const char *Ptr, unsigned Len) {
62  // Look up the filename in the string table, returning the pre-existing value
63  // if it exists.
64  llvm::StringMapEntry<unsigned> &Entry =
65    FilenameIDs.GetOrCreateValue(Ptr, Ptr+Len, ~0U);
66  if (Entry.getValue() != ~0U)
67    return Entry.getValue();
68
69  // Otherwise, assign this the next available ID.
70  Entry.setValue(FilenamesByID.size());
71  FilenamesByID.push_back(&Entry);
72  return FilenamesByID.size()-1;
73}
74
75/// AddLineNote - Add a line note to the line table that indicates that there
76/// is a #line at the specified FID/Offset location which changes the presumed
77/// location to LineNo/FilenameID.
78void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
79                                unsigned LineNo, int FilenameID) {
80  std::vector<LineEntry> &Entries = LineEntries[FID];
81
82  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
83         "Adding line entries out of order!");
84
85  SrcMgr::CharacteristicKind Kind = SrcMgr::C_User;
86  unsigned IncludeOffset = 0;
87
88  if (!Entries.empty()) {
89    // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember
90    // that we are still in "foo.h".
91    if (FilenameID == -1)
92      FilenameID = Entries.back().FilenameID;
93
94    // If we are after a line marker that switched us to system header mode, or
95    // that set #include information, preserve it.
96    Kind = Entries.back().FileKind;
97    IncludeOffset = Entries.back().IncludeOffset;
98  }
99
100  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind,
101                                   IncludeOffset));
102}
103
104/// AddLineNote This is the same as the previous version of AddLineNote, but is
105/// used for GNU line markers.  If EntryExit is 0, then this doesn't change the
106/// presumed #include stack.  If it is 1, this is a file entry, if it is 2 then
107/// this is a file exit.  FileKind specifies whether this is a system header or
108/// extern C system header.
109void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
110                                unsigned LineNo, int FilenameID,
111                                unsigned EntryExit,
112                                SrcMgr::CharacteristicKind FileKind) {
113  assert(FilenameID != -1 && "Unspecified filename should use other accessor");
114
115  std::vector<LineEntry> &Entries = LineEntries[FID];
116
117  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
118         "Adding line entries out of order!");
119
120  unsigned IncludeOffset = 0;
121  if (EntryExit == 0) {  // No #include stack change.
122    IncludeOffset = Entries.empty() ? 0 : Entries.back().IncludeOffset;
123  } else if (EntryExit == 1) {
124    IncludeOffset = Offset-1;
125  } else if (EntryExit == 2) {
126    assert(!Entries.empty() && Entries.back().IncludeOffset &&
127       "PPDirectives should have caught case when popping empty include stack");
128
129    // Get the include loc of the last entries' include loc as our include loc.
130    IncludeOffset = 0;
131    if (const LineEntry *PrevEntry =
132          FindNearestLineEntry(FID, Entries.back().IncludeOffset))
133      IncludeOffset = PrevEntry->IncludeOffset;
134  }
135
136  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind,
137                                   IncludeOffset));
138}
139
140
141/// FindNearestLineEntry - Find the line entry nearest to FID that is before
142/// it.  If there is no line entry before Offset in FID, return null.
143const LineEntry *LineTableInfo::FindNearestLineEntry(unsigned FID,
144                                                     unsigned Offset) {
145  const std::vector<LineEntry> &Entries = LineEntries[FID];
146  assert(!Entries.empty() && "No #line entries for this FID after all!");
147
148  // It is very common for the query to be after the last #line, check this
149  // first.
150  if (Entries.back().FileOffset <= Offset)
151    return &Entries.back();
152
153  // Do a binary search to find the maximal element that is still before Offset.
154  std::vector<LineEntry>::const_iterator I =
155    std::upper_bound(Entries.begin(), Entries.end(), Offset);
156  if (I == Entries.begin()) return 0;
157  return &*--I;
158}
159
160/// \brief Add a new line entry that has already been encoded into
161/// the internal representation of the line table.
162void LineTableInfo::AddEntry(unsigned FID,
163                             const std::vector<LineEntry> &Entries) {
164  LineEntries[FID] = Entries;
165}
166
167/// getLineTableFilenameID - Return the uniqued ID for the specified filename.
168///
169unsigned SourceManager::getLineTableFilenameID(const char *Ptr, unsigned Len) {
170  if (LineTable == 0)
171    LineTable = new LineTableInfo();
172  return LineTable->getLineTableFilenameID(Ptr, Len);
173}
174
175
176/// AddLineNote - Add a line note to the line table for the FileID and offset
177/// specified by Loc.  If FilenameID is -1, it is considered to be
178/// unspecified.
179void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
180                                int FilenameID) {
181  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
182
183  const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
184
185  // Remember that this file has #line directives now if it doesn't already.
186  const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
187
188  if (LineTable == 0)
189    LineTable = new LineTableInfo();
190  LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID);
191}
192
193/// AddLineNote - Add a GNU line marker to the line table.
194void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
195                                int FilenameID, bool IsFileEntry,
196                                bool IsFileExit, bool IsSystemHeader,
197                                bool IsExternCHeader) {
198  // If there is no filename and no flags, this is treated just like a #line,
199  // which does not change the flags of the previous line marker.
200  if (FilenameID == -1) {
201    assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader &&
202           "Can't set flags without setting the filename!");
203    return AddLineNote(Loc, LineNo, FilenameID);
204  }
205
206  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
207  const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
208
209  // Remember that this file has #line directives now if it doesn't already.
210  const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
211
212  if (LineTable == 0)
213    LineTable = new LineTableInfo();
214
215  SrcMgr::CharacteristicKind FileKind;
216  if (IsExternCHeader)
217    FileKind = SrcMgr::C_ExternCSystem;
218  else if (IsSystemHeader)
219    FileKind = SrcMgr::C_System;
220  else
221    FileKind = SrcMgr::C_User;
222
223  unsigned EntryExit = 0;
224  if (IsFileEntry)
225    EntryExit = 1;
226  else if (IsFileExit)
227    EntryExit = 2;
228
229  LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID,
230                         EntryExit, FileKind);
231}
232
233LineTableInfo &SourceManager::getLineTable() {
234  if (LineTable == 0)
235    LineTable = new LineTableInfo();
236  return *LineTable;
237}
238
239//===----------------------------------------------------------------------===//
240// Private 'Create' methods.
241//===----------------------------------------------------------------------===//
242
243SourceManager::~SourceManager() {
244  delete LineTable;
245
246  // Delete FileEntry objects corresponding to content caches.  Since the actual
247  // content cache objects are bump pointer allocated, we just have to run the
248  // dtors, but we call the deallocate method for completeness.
249  for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) {
250    MemBufferInfos[i]->~ContentCache();
251    ContentCacheAlloc.Deallocate(MemBufferInfos[i]);
252  }
253  for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator
254       I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
255    I->second->~ContentCache();
256    ContentCacheAlloc.Deallocate(I->second);
257  }
258}
259
260void SourceManager::clearIDTables() {
261  MainFileID = FileID();
262  SLocEntryTable.clear();
263  LastLineNoFileIDQuery = FileID();
264  LastLineNoContentCache = 0;
265  LastFileIDLookup = FileID();
266
267  if (LineTable)
268    LineTable->clear();
269
270  // Use up FileID #0 as an invalid instantiation.
271  NextOffset = 0;
272  createInstantiationLoc(SourceLocation(),SourceLocation(),SourceLocation(), 1);
273}
274
275/// getOrCreateContentCache - Create or return a cached ContentCache for the
276/// specified file.
277const ContentCache *
278SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) {
279  assert(FileEnt && "Didn't specify a file entry to use?");
280
281  // Do we already have information about this file?
282  ContentCache *&Entry = FileInfos[FileEnt];
283  if (Entry) return Entry;
284
285  // Nope, create a new Cache entry.  Make sure it is at least 8-byte aligned
286  // so that FileInfo can use the low 3 bits of the pointer for its own
287  // nefarious purposes.
288  unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
289  EntryAlign = std::max(8U, EntryAlign);
290  Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
291  new (Entry) ContentCache(FileEnt);
292  return Entry;
293}
294
295
296/// createMemBufferContentCache - Create a new ContentCache for the specified
297///  memory buffer.  This does no caching.
298const ContentCache*
299SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
300  // Add a new ContentCache to the MemBufferInfos list and return it.  Make sure
301  // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of
302  // the pointer for its own nefarious purposes.
303  unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
304  EntryAlign = std::max(8U, EntryAlign);
305  ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
306  new (Entry) ContentCache();
307  MemBufferInfos.push_back(Entry);
308  Entry->setBuffer(Buffer);
309  return Entry;
310}
311
312//===----------------------------------------------------------------------===//
313// Methods to create new FileID's and instantiations.
314//===----------------------------------------------------------------------===//
315
316/// createFileID - Create a new fileID for the specified ContentCache and
317/// include position.  This works regardless of whether the ContentCache
318/// corresponds to a file or some other input source.
319FileID SourceManager::createFileID(const ContentCache *File,
320                                   SourceLocation IncludePos,
321                                   SrcMgr::CharacteristicKind FileCharacter) {
322  SLocEntryTable.push_back(SLocEntry::get(NextOffset,
323                                          FileInfo::get(IncludePos, File,
324                                                        FileCharacter)));
325  unsigned FileSize = File->getSize();
326  assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!");
327  NextOffset += FileSize+1;
328
329  // Set LastFileIDLookup to the newly created file.  The next getFileID call is
330  // almost guaranteed to be from that file.
331  return LastFileIDLookup = FileID::get(SLocEntryTable.size()-1);
332}
333
334/// createInstantiationLoc - Return a new SourceLocation that encodes the fact
335/// that a token from SpellingLoc should actually be referenced from
336/// InstantiationLoc.
337SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
338                                                     SourceLocation ILocStart,
339                                                     SourceLocation ILocEnd,
340                                                     unsigned TokLength) {
341  InstantiationInfo II = InstantiationInfo::get(ILocStart,ILocEnd, SpellingLoc);
342  SLocEntryTable.push_back(SLocEntry::get(NextOffset, II));
343  assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!");
344  NextOffset += TokLength+1;
345  return SourceLocation::getMacroLoc(NextOffset-(TokLength+1));
346}
347
348/// getBufferData - Return a pointer to the start and end of the source buffer
349/// data for the specified FileID.
350std::pair<const char*, const char*>
351SourceManager::getBufferData(FileID FID) const {
352  const llvm::MemoryBuffer *Buf = getBuffer(FID);
353  return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
354}
355
356
357//===----------------------------------------------------------------------===//
358// SourceLocation manipulation methods.
359//===----------------------------------------------------------------------===//
360
361/// getFileIDSlow - Return the FileID for a SourceLocation.  This is a very hot
362/// method that is used for all SourceManager queries that start with a
363/// SourceLocation object.  It is responsible for finding the entry in
364/// SLocEntryTable which contains the specified location.
365///
366FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
367  assert(SLocOffset && "Invalid FileID");
368
369  // After the first and second level caches, I see two common sorts of
370  // behavior: 1) a lot of searched FileID's are "near" the cached file location
371  // or are "near" the cached instantiation location.  2) others are just
372  // completely random and may be a very long way away.
373  //
374  // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
375  // then we fall back to a less cache efficient, but more scalable, binary
376  // search to find the location.
377
378  // See if this is near the file point - worst case we start scanning from the
379  // most newly created FileID.
380  std::vector<SrcMgr::SLocEntry>::const_iterator I;
381
382  if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) {
383    // Neither loc prunes our search.
384    I = SLocEntryTable.end();
385  } else {
386    // Perhaps it is near the file point.
387    I = SLocEntryTable.begin()+LastFileIDLookup.ID;
388  }
389
390  // Find the FileID that contains this.  "I" is an iterator that points to a
391  // FileID whose offset is known to be larger than SLocOffset.
392  unsigned NumProbes = 0;
393  while (1) {
394    --I;
395    if (I->getOffset() <= SLocOffset) {
396#if 0
397      printf("lin %d -> %d [%s] %d %d\n", SLocOffset,
398             I-SLocEntryTable.begin(),
399             I->isInstantiation() ? "inst" : "file",
400             LastFileIDLookup.ID,  int(SLocEntryTable.end()-I));
401#endif
402      FileID Res = FileID::get(I-SLocEntryTable.begin());
403
404      // If this isn't an instantiation, remember it.  We have good locality
405      // across FileID lookups.
406      if (!I->isInstantiation())
407        LastFileIDLookup = Res;
408      NumLinearScans += NumProbes+1;
409      return Res;
410    }
411    if (++NumProbes == 8)
412      break;
413  }
414
415  // Convert "I" back into an index.  We know that it is an entry whose index is
416  // larger than the offset we are looking for.
417  unsigned GreaterIndex = I-SLocEntryTable.begin();
418  // LessIndex - This is the lower bound of the range that we're searching.
419  // We know that the offset corresponding to the FileID is is less than
420  // SLocOffset.
421  unsigned LessIndex = 0;
422  NumProbes = 0;
423  while (1) {
424    unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
425    unsigned MidOffset = SLocEntryTable[MiddleIndex].getOffset();
426
427    ++NumProbes;
428
429    // If the offset of the midpoint is too large, chop the high side of the
430    // range to the midpoint.
431    if (MidOffset > SLocOffset) {
432      GreaterIndex = MiddleIndex;
433      continue;
434    }
435
436    // If the middle index contains the value, succeed and return.
437    if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) {
438#if 0
439      printf("bin %d -> %d [%s] %d %d\n", SLocOffset,
440             I-SLocEntryTable.begin(),
441             I->isInstantiation() ? "inst" : "file",
442             LastFileIDLookup.ID, int(SLocEntryTable.end()-I));
443#endif
444      FileID Res = FileID::get(MiddleIndex);
445
446      // If this isn't an instantiation, remember it.  We have good locality
447      // across FileID lookups.
448      if (!I->isInstantiation())
449        LastFileIDLookup = Res;
450      NumBinaryProbes += NumProbes;
451      return Res;
452    }
453
454    // Otherwise, move the low-side up to the middle index.
455    LessIndex = MiddleIndex;
456  }
457}
458
459SourceLocation SourceManager::
460getInstantiationLocSlowCase(SourceLocation Loc) const {
461  do {
462    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
463    Loc = getSLocEntry(LocInfo.first).getInstantiation()
464                   .getInstantiationLocStart();
465    Loc = Loc.getFileLocWithOffset(LocInfo.second);
466  } while (!Loc.isFileID());
467
468  return Loc;
469}
470
471SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const {
472  do {
473    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
474    Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
475    Loc = Loc.getFileLocWithOffset(LocInfo.second);
476  } while (!Loc.isFileID());
477  return Loc;
478}
479
480
481std::pair<FileID, unsigned>
482SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
483                                                     unsigned Offset) const {
484  // If this is an instantiation record, walk through all the instantiation
485  // points.
486  FileID FID;
487  SourceLocation Loc;
488  do {
489    Loc = E->getInstantiation().getInstantiationLocStart();
490
491    FID = getFileID(Loc);
492    E = &getSLocEntry(FID);
493    Offset += Loc.getOffset()-E->getOffset();
494  } while (!Loc.isFileID());
495
496  return std::make_pair(FID, Offset);
497}
498
499std::pair<FileID, unsigned>
500SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
501                                                unsigned Offset) const {
502  // If this is an instantiation record, walk through all the instantiation
503  // points.
504  FileID FID;
505  SourceLocation Loc;
506  do {
507    Loc = E->getInstantiation().getSpellingLoc();
508
509    FID = getFileID(Loc);
510    E = &getSLocEntry(FID);
511    Offset += Loc.getOffset()-E->getOffset();
512  } while (!Loc.isFileID());
513
514  return std::make_pair(FID, Offset);
515}
516
517/// getImmediateSpellingLoc - Given a SourceLocation object, return the
518/// spelling location referenced by the ID.  This is the first level down
519/// towards the place where the characters that make up the lexed token can be
520/// found.  This should not generally be used by clients.
521SourceLocation SourceManager::getImmediateSpellingLoc(SourceLocation Loc) const{
522  if (Loc.isFileID()) return Loc;
523  std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
524  Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
525  return Loc.getFileLocWithOffset(LocInfo.second);
526}
527
528
529/// getImmediateInstantiationRange - Loc is required to be an instantiation
530/// location.  Return the start/end of the instantiation information.
531std::pair<SourceLocation,SourceLocation>
532SourceManager::getImmediateInstantiationRange(SourceLocation Loc) const {
533  assert(Loc.isMacroID() && "Not an instantiation loc!");
534  const InstantiationInfo &II = getSLocEntry(getFileID(Loc)).getInstantiation();
535  return II.getInstantiationLocRange();
536}
537
538/// getInstantiationRange - Given a SourceLocation object, return the
539/// range of tokens covered by the instantiation in the ultimate file.
540std::pair<SourceLocation,SourceLocation>
541SourceManager::getInstantiationRange(SourceLocation Loc) const {
542  if (Loc.isFileID()) return std::make_pair(Loc, Loc);
543
544  std::pair<SourceLocation,SourceLocation> Res =
545    getImmediateInstantiationRange(Loc);
546
547  // Fully resolve the start and end locations to their ultimate instantiation
548  // points.
549  while (!Res.first.isFileID())
550    Res.first = getImmediateInstantiationRange(Res.first).first;
551  while (!Res.second.isFileID())
552    Res.second = getImmediateInstantiationRange(Res.second).second;
553  return Res;
554}
555
556
557
558//===----------------------------------------------------------------------===//
559// Queries about the code at a SourceLocation.
560//===----------------------------------------------------------------------===//
561
562/// getCharacterData - Return a pointer to the start of the specified location
563/// in the appropriate MemoryBuffer.
564const char *SourceManager::getCharacterData(SourceLocation SL) const {
565  // Note that this is a hot function in the getSpelling() path, which is
566  // heavily used by -E mode.
567  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
568
569  // Note that calling 'getBuffer()' may lazily page in a source file.
570  return getSLocEntry(LocInfo.first).getFile().getContentCache()
571              ->getBuffer()->getBufferStart() + LocInfo.second;
572}
573
574
575/// getColumnNumber - Return the column # for the specified file position.
576/// this is significantly cheaper to compute than the line number.
577unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
578  const char *Buf = getBuffer(FID)->getBufferStart();
579
580  unsigned LineStart = FilePos;
581  while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
582    --LineStart;
583  return FilePos-LineStart+1;
584}
585
586unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const {
587  if (Loc.isInvalid()) return 0;
588  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
589  return getColumnNumber(LocInfo.first, LocInfo.second);
590}
591
592unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const {
593  if (Loc.isInvalid()) return 0;
594  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
595  return getColumnNumber(LocInfo.first, LocInfo.second);
596}
597
598
599
600static void ComputeLineNumbers(ContentCache* FI,
601                               llvm::BumpPtrAllocator &Alloc) DISABLE_INLINE;
602static void ComputeLineNumbers(ContentCache* FI, llvm::BumpPtrAllocator &Alloc){
603  // Note that calling 'getBuffer()' may lazily page in the file.
604  const MemoryBuffer *Buffer = FI->getBuffer();
605
606  // Find the file offsets of all of the *physical* source lines.  This does
607  // not look at trigraphs, escaped newlines, or anything else tricky.
608  std::vector<unsigned> LineOffsets;
609
610  // Line #1 starts at char 0.
611  LineOffsets.push_back(0);
612
613  const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
614  const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
615  unsigned Offs = 0;
616  while (1) {
617    // Skip over the contents of the line.
618    // TODO: Vectorize this?  This is very performance sensitive for programs
619    // with lots of diagnostics and in -E mode.
620    const unsigned char *NextBuf = (const unsigned char *)Buf;
621    while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
622      ++NextBuf;
623    Offs += NextBuf-Buf;
624    Buf = NextBuf;
625
626    if (Buf[0] == '\n' || Buf[0] == '\r') {
627      // If this is \n\r or \r\n, skip both characters.
628      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
629        ++Offs, ++Buf;
630      ++Offs, ++Buf;
631      LineOffsets.push_back(Offs);
632    } else {
633      // Otherwise, this is a null.  If end of file, exit.
634      if (Buf == End) break;
635      // Otherwise, skip the null.
636      ++Offs, ++Buf;
637    }
638  }
639
640  // Copy the offsets into the FileInfo structure.
641  FI->NumLines = LineOffsets.size();
642  FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size());
643  std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
644}
645
646/// getLineNumber - Given a SourceLocation, return the spelling line number
647/// for the position indicated.  This requires building and caching a table of
648/// line offsets for the MemoryBuffer, so this is not cheap: use only when
649/// about to emit a diagnostic.
650unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
651  ContentCache *Content;
652  if (LastLineNoFileIDQuery == FID)
653    Content = LastLineNoContentCache;
654  else
655    Content = const_cast<ContentCache*>(getSLocEntry(FID)
656                                        .getFile().getContentCache());
657
658  // If this is the first use of line information for this buffer, compute the
659  /// SourceLineCache for it on demand.
660  if (Content->SourceLineCache == 0)
661    ComputeLineNumbers(Content, ContentCacheAlloc);
662
663  // Okay, we know we have a line number table.  Do a binary search to find the
664  // line number that this character position lands on.
665  unsigned *SourceLineCache = Content->SourceLineCache;
666  unsigned *SourceLineCacheStart = SourceLineCache;
667  unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
668
669  unsigned QueriedFilePos = FilePos+1;
670
671  // If the previous query was to the same file, we know both the file pos from
672  // that query and the line number returned.  This allows us to narrow the
673  // search space from the entire file to something near the match.
674  if (LastLineNoFileIDQuery == FID) {
675    if (QueriedFilePos >= LastLineNoFilePos) {
676      SourceLineCache = SourceLineCache+LastLineNoResult-1;
677
678      // The query is likely to be nearby the previous one.  Here we check to
679      // see if it is within 5, 10 or 20 lines.  It can be far away in cases
680      // where big comment blocks and vertical whitespace eat up lines but
681      // contribute no tokens.
682      if (SourceLineCache+5 < SourceLineCacheEnd) {
683        if (SourceLineCache[5] > QueriedFilePos)
684          SourceLineCacheEnd = SourceLineCache+5;
685        else if (SourceLineCache+10 < SourceLineCacheEnd) {
686          if (SourceLineCache[10] > QueriedFilePos)
687            SourceLineCacheEnd = SourceLineCache+10;
688          else if (SourceLineCache+20 < SourceLineCacheEnd) {
689            if (SourceLineCache[20] > QueriedFilePos)
690              SourceLineCacheEnd = SourceLineCache+20;
691          }
692        }
693      }
694    } else {
695      SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
696    }
697  }
698
699  // If the spread is large, do a "radix" test as our initial guess, based on
700  // the assumption that lines average to approximately the same length.
701  // NOTE: This is currently disabled, as it does not appear to be profitable in
702  // initial measurements.
703  if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
704    unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
705
706    // Take a stab at guessing where it is.
707    unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
708
709    // Check for -10 and +10 lines.
710    unsigned LowerBound = std::max(int(ApproxPos-10), 0);
711    unsigned UpperBound = std::min(ApproxPos+10, FileLen);
712
713    // If the computed lower bound is less than the query location, move it in.
714    if (SourceLineCache < SourceLineCacheStart+LowerBound &&
715        SourceLineCacheStart[LowerBound] < QueriedFilePos)
716      SourceLineCache = SourceLineCacheStart+LowerBound;
717
718    // If the computed upper bound is greater than the query location, move it.
719    if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
720        SourceLineCacheStart[UpperBound] >= QueriedFilePos)
721      SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
722  }
723
724  unsigned *Pos
725    = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
726  unsigned LineNo = Pos-SourceLineCacheStart;
727
728  LastLineNoFileIDQuery = FID;
729  LastLineNoContentCache = Content;
730  LastLineNoFilePos = QueriedFilePos;
731  LastLineNoResult = LineNo;
732  return LineNo;
733}
734
735unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const {
736  if (Loc.isInvalid()) return 0;
737  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
738  return getLineNumber(LocInfo.first, LocInfo.second);
739}
740unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const {
741  if (Loc.isInvalid()) return 0;
742  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
743  return getLineNumber(LocInfo.first, LocInfo.second);
744}
745
746/// getFileCharacteristic - return the file characteristic of the specified
747/// source location, indicating whether this is a normal file, a system
748/// header, or an "implicit extern C" system header.
749///
750/// This state can be modified with flags on GNU linemarker directives like:
751///   # 4 "foo.h" 3
752/// which changes all source locations in the current file after that to be
753/// considered to be from a system header.
754SrcMgr::CharacteristicKind
755SourceManager::getFileCharacteristic(SourceLocation Loc) const {
756  assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!");
757  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
758  const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
759
760  // If there are no #line directives in this file, just return the whole-file
761  // state.
762  if (!FI.hasLineDirectives())
763    return FI.getFileCharacteristic();
764
765  assert(LineTable && "Can't have linetable entries without a LineTable!");
766  // See if there is a #line directive before the location.
767  const LineEntry *Entry =
768    LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second);
769
770  // If this is before the first line marker, use the file characteristic.
771  if (!Entry)
772    return FI.getFileCharacteristic();
773
774  return Entry->FileKind;
775}
776
777/// Return the filename or buffer identifier of the buffer the location is in.
778/// Note that this name does not respect #line directives.  Use getPresumedLoc
779/// for normal clients.
780const char *SourceManager::getBufferName(SourceLocation Loc) const {
781  if (Loc.isInvalid()) return "<invalid loc>";
782
783  return getBuffer(getFileID(Loc))->getBufferIdentifier();
784}
785
786
787/// getPresumedLoc - This method returns the "presumed" location of a
788/// SourceLocation specifies.  A "presumed location" can be modified by #line
789/// or GNU line marker directives.  This provides a view on the data that a
790/// user should see in diagnostics, for example.
791///
792/// Note that a presumed location is always given as the instantiation point
793/// of an instantiation location, not at the spelling location.
794PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const {
795  if (Loc.isInvalid()) return PresumedLoc();
796
797  // Presumed locations are always for instantiation points.
798  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
799
800  const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
801  const SrcMgr::ContentCache *C = FI.getContentCache();
802
803  // To get the source name, first consult the FileEntry (if one exists)
804  // before the MemBuffer as this will avoid unnecessarily paging in the
805  // MemBuffer.
806  const char *Filename =
807    C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
808  unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second);
809  unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second);
810  SourceLocation IncludeLoc = FI.getIncludeLoc();
811
812  // If we have #line directives in this file, update and overwrite the physical
813  // location info if appropriate.
814  if (FI.hasLineDirectives()) {
815    assert(LineTable && "Can't have linetable entries without a LineTable!");
816    // See if there is a #line directive before this.  If so, get it.
817    if (const LineEntry *Entry =
818          LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) {
819      // If the LineEntry indicates a filename, use it.
820      if (Entry->FilenameID != -1)
821        Filename = LineTable->getFilename(Entry->FilenameID);
822
823      // Use the line number specified by the LineEntry.  This line number may
824      // be multiple lines down from the line entry.  Add the difference in
825      // physical line numbers from the query point and the line marker to the
826      // total.
827      unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset);
828      LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1);
829
830      // Note that column numbers are not molested by line markers.
831
832      // Handle virtual #include manipulation.
833      if (Entry->IncludeOffset) {
834        IncludeLoc = getLocForStartOfFile(LocInfo.first);
835        IncludeLoc = IncludeLoc.getFileLocWithOffset(Entry->IncludeOffset);
836      }
837    }
838  }
839
840  return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc);
841}
842
843//===----------------------------------------------------------------------===//
844// Other miscellaneous methods.
845//===----------------------------------------------------------------------===//
846
847
848/// PrintStats - Print statistics to stderr.
849///
850void SourceManager::PrintStats() const {
851  llvm::cerr << "\n*** Source Manager Stats:\n";
852  llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
853             << " mem buffers mapped.\n";
854  llvm::cerr << SLocEntryTable.size() << " SLocEntry's allocated, "
855             << NextOffset << "B of Sloc address space used.\n";
856
857  unsigned NumLineNumsComputed = 0;
858  unsigned NumFileBytesMapped = 0;
859  for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
860    NumLineNumsComputed += I->second->SourceLineCache != 0;
861    NumFileBytesMapped  += I->second->getSizeBytesMapped();
862  }
863
864  llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
865             << NumLineNumsComputed << " files with line #'s computed.\n";
866  llvm::cerr << "FileID scans: " << NumLinearScans << " linear, "
867             << NumBinaryProbes << " binary.\n";
868}
869
870//===----------------------------------------------------------------------===//
871// Serialization.
872//===----------------------------------------------------------------------===//
873
874void ContentCache::Emit(llvm::Serializer& S) const {
875  S.FlushRecord();
876  S.EmitPtr(this);
877
878  if (Entry) {
879    llvm::sys::Path Fname(Buffer->getBufferIdentifier());
880
881    if (Fname.isAbsolute())
882      S.EmitCStr(Fname.c_str());
883    else {
884      // Create an absolute path.
885      // FIXME: This will potentially contain ".." and "." in the path.
886      llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
887      path.appendComponent(Fname.c_str());
888      S.EmitCStr(path.c_str());
889    }
890  }
891  else {
892    const char* p = Buffer->getBufferStart();
893    const char* e = Buffer->getBufferEnd();
894
895    S.EmitInt(e-p);
896
897    for ( ; p != e; ++p)
898      S.EmitInt(*p);
899  }
900
901  S.FlushRecord();
902}
903
904void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
905                                       SourceManager& SMgr,
906                                       FileManager* FMgr,
907                                       std::vector<char>& Buf) {
908  if (FMgr) {
909    llvm::SerializedPtrID PtrID = D.ReadPtrID();
910    D.ReadCStr(Buf,false);
911
912    // Create/fetch the FileEntry.
913    const char* start = &Buf[0];
914    const FileEntry* E = FMgr->getFile(start,start+Buf.size());
915
916    // FIXME: Ideally we want a lazy materialization of the ContentCache
917    //  anyway, because we don't want to read in source files unless this
918    //  is absolutely needed.
919    if (!E)
920      D.RegisterPtr(PtrID,NULL);
921    else
922      // Get the ContextCache object and register it with the deserializer.
923      D.RegisterPtr(PtrID, SMgr.getOrCreateContentCache(E));
924    return;
925  }
926
927  // Register the ContextCache object with the deserializer.
928  /* FIXME:
929  ContentCache *Entry
930  SMgr.MemBufferInfos.push_back(ContentCache());
931   = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
932  D.RegisterPtr(&Entry);
933
934  // Create the buffer.
935  unsigned Size = D.ReadInt();
936  Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
937
938  // Read the contents of the buffer.
939  char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
940  for (unsigned i = 0; i < Size ; ++i)
941    p[i] = D.ReadInt();
942   */
943}
944
945void SourceManager::Emit(llvm::Serializer& S) const {
946  S.EnterBlock();
947  S.EmitPtr(this);
948  S.EmitInt(MainFileID.getOpaqueValue());
949
950  // Emit: FileInfos.  Just emit the file name.
951  S.EnterBlock();
952
953  // FIXME: Emit FileInfos.
954  //std::for_each(FileInfos.begin(), FileInfos.end(),
955  //              S.MakeEmitter<ContentCache>());
956
957  S.ExitBlock();
958
959  // Emit: MemBufferInfos
960  S.EnterBlock();
961
962  /* FIXME: EMIT.
963  std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
964                S.MakeEmitter<ContentCache>());
965   */
966
967  S.ExitBlock();
968
969  // FIXME: Emit SLocEntryTable.
970
971  S.ExitBlock();
972}
973
974SourceManager*
975SourceManager::CreateAndRegister(llvm::Deserializer &D, FileManager &FMgr) {
976  SourceManager *M = new SourceManager();
977  D.RegisterPtr(M);
978
979  // Read: the FileID of the main source file of the translation unit.
980  M->MainFileID = FileID::get(D.ReadInt());
981
982  std::vector<char> Buf;
983
984  /*{ // FIXME Read: FileInfos.
985    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
986    while (!D.FinishedBlock(BLoc))
987    ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
988  }*/
989
990  /*{ // FIXME Read: MemBufferInfos.
991    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
992    while (!D.FinishedBlock(BLoc))
993    ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
994    }*/
995
996  // FIXME: Read SLocEntryTable.
997
998  return M;
999}
1000