SourceManager.cpp revision 6b3066780bda02e3117d71a18ca2f430ed1454af
1//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/MemoryBuffer.h"
18#include "llvm/System/Path.h"
19#include "llvm/Bitcode/Serialize.h"
20#include "llvm/Bitcode/Deserialize.h"
21#include "llvm/Support/Streams.h"
22#include <algorithm>
23using namespace clang;
24using namespace SrcMgr;
25using llvm::MemoryBuffer;
26
27//===----------------------------------------------------------------------===//
28// SourceManager Helper Classes
29//===----------------------------------------------------------------------===//
30
31ContentCache::~ContentCache() {
32  delete Buffer;
33}
34
35/// getSizeBytesMapped - Returns the number of bytes actually mapped for
36///  this ContentCache.  This can be 0 if the MemBuffer was not actually
37///  instantiated.
38unsigned ContentCache::getSizeBytesMapped() const {
39  return Buffer ? Buffer->getBufferSize() : 0;
40}
41
42/// getSize - Returns the size of the content encapsulated by this ContentCache.
43///  This can be the size of the source file or the size of an arbitrary
44///  scratch buffer.  If the ContentCache encapsulates a source file, that
45///  file is not lazily brought in from disk to satisfy this query.
46unsigned ContentCache::getSize() const {
47  return Entry ? Entry->getSize() : Buffer->getBufferSize();
48}
49
50const llvm::MemoryBuffer *ContentCache::getBuffer() const {
51  // Lazily create the Buffer for ContentCaches that wrap files.
52  if (!Buffer && Entry) {
53    // FIXME: Should we support a way to not have to do this check over
54    //   and over if we cannot open the file?
55    Buffer = MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize());
56  }
57  return Buffer;
58}
59
60//===----------------------------------------------------------------------===//
61// Line Table Implementation
62//===----------------------------------------------------------------------===//
63
64namespace clang {
65struct LineEntry {
66  /// FileOffset - The offset in this file that the line entry occurs at.
67  unsigned FileOffset;
68
69  /// LineNo - The presumed line number of this line entry: #line 4.
70  unsigned LineNo;
71
72  /// FilenameID - The ID of the filename identified by this line entry:
73  /// #line 4 "foo.c".  This is -1 if not specified.
74  int FilenameID;
75
76  /// Flags - Set the 0 if no flags, 1 if a system header,
77  SrcMgr::CharacteristicKind FileKind;
78
79  static LineEntry get(unsigned Offs, unsigned Line, int Filename,
80                       SrcMgr::CharacteristicKind FileKind) {
81    LineEntry E;
82    E.FileOffset = Offs;
83    E.LineNo = Line;
84    E.FilenameID = Filename;
85    E.FileKind = FileKind;
86    return E;
87  }
88};
89
90inline bool operator<(const LineEntry &E, unsigned Offset) {
91  return E.FileOffset < Offset;
92}
93
94inline bool operator<(unsigned Offset, const LineEntry &E) {
95  return Offset < E.FileOffset;
96}
97
98/// LineTableInfo - This class is used to hold and unique data used to
99/// represent #line information.
100class LineTableInfo {
101  /// FilenameIDs - This map is used to assign unique IDs to filenames in
102  /// #line directives.  This allows us to unique the filenames that
103  /// frequently reoccur and reference them with indices.  FilenameIDs holds
104  /// the mapping from string -> ID, and FilenamesByID holds the mapping of ID
105  /// to string.
106  llvm::StringMap<unsigned, llvm::BumpPtrAllocator> FilenameIDs;
107  std::vector<llvm::StringMapEntry<unsigned>*> FilenamesByID;
108
109  /// LineEntries - This is a map from FileIDs to a list of line entries (sorted
110  /// by the offset they occur in the file.
111  std::map<unsigned, std::vector<LineEntry> > LineEntries;
112public:
113  LineTableInfo() {
114  }
115
116  void clear() {
117    FilenameIDs.clear();
118    FilenamesByID.clear();
119  }
120
121  ~LineTableInfo() {}
122
123  unsigned getLineTableFilenameID(const char *Ptr, unsigned Len);
124  const char *getFilename(unsigned ID) const {
125    assert(ID < FilenamesByID.size() && "Invalid FilenameID");
126    return FilenamesByID[ID]->getKeyData();
127  }
128
129  void AddLineNote(unsigned FID, unsigned Offset,
130                   unsigned LineNo, int FilenameID);
131  void AddLineNote(unsigned FID, unsigned Offset,
132                   unsigned LineNo, int FilenameID,
133                   unsigned EntryExit, SrcMgr::CharacteristicKind FileKind);
134
135
136  /// FindNearestLineEntry - Find the line entry nearest to FID that is before
137  /// it.  If there is no line entry before Offset in FID, return null.
138  const LineEntry *FindNearestLineEntry(unsigned FID, unsigned Offset);
139};
140} // namespace clang
141
142unsigned LineTableInfo::getLineTableFilenameID(const char *Ptr, unsigned Len) {
143  // Look up the filename in the string table, returning the pre-existing value
144  // if it exists.
145  llvm::StringMapEntry<unsigned> &Entry =
146    FilenameIDs.GetOrCreateValue(Ptr, Ptr+Len, ~0U);
147  if (Entry.getValue() != ~0U)
148    return Entry.getValue();
149
150  // Otherwise, assign this the next available ID.
151  Entry.setValue(FilenamesByID.size());
152  FilenamesByID.push_back(&Entry);
153  return FilenamesByID.size()-1;
154}
155
156/// AddLineNote - Add a line note to the line table that indicates that there
157/// is a #line at the specified FID/Offset location which changes the presumed
158/// location to LineNo/FilenameID.
159void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
160                                unsigned LineNo, int FilenameID) {
161  std::vector<LineEntry> &Entries = LineEntries[FID];
162
163  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
164         "Adding line entries out of order!");
165
166  SrcMgr::CharacteristicKind Kind = SrcMgr::C_User;
167
168  if (!Entries.empty()) {
169    // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember
170    // that we are still in "foo.h".
171    if (FilenameID == -1)
172      FilenameID = Entries.back().FilenameID;
173
174    // If we are after a line marker that switched us to system header mode,
175    // preserve it.
176    Kind = Entries.back().FileKind;
177  }
178
179  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind));
180}
181
182/// AddLineNote This is the same as the previous version of AddLineNote, but is
183/// used for GNU line markers.  If EntryExit is 0, then this doesn't change the
184/// presumed #include stack.  If it is 1, this is a file entry, if it is 2 then
185/// this is a file exit.  FileKind specifies whether this is a system header or
186/// extern C system header.
187void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
188                                unsigned LineNo, int FilenameID,
189                                unsigned EntryExit,
190                                SrcMgr::CharacteristicKind FileKind) {
191  assert(FilenameID != -1 && "Unspecified filename should use other accessor");
192
193  std::vector<LineEntry> &Entries = LineEntries[FID];
194
195  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
196         "Adding line entries out of order!");
197
198
199  // TODO: Handle EntryExit.
200
201  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind));
202}
203
204
205/// FindNearestLineEntry - Find the line entry nearest to FID that is before
206/// it.  If there is no line entry before Offset in FID, return null.
207const LineEntry *LineTableInfo::FindNearestLineEntry(unsigned FID,
208                                                     unsigned Offset) {
209  const std::vector<LineEntry> &Entries = LineEntries[FID];
210  assert(!Entries.empty() && "No #line entries for this FID after all!");
211
212  // It is very common for the query to be after the last #line, check this
213  // first.
214  if (Entries.back().FileOffset <= Offset)
215    return &Entries.back();
216
217  // Do a binary search to find the maximal element that is still before Offset.
218  std::vector<LineEntry>::const_iterator I =
219    std::upper_bound(Entries.begin(), Entries.end(), Offset);
220  if (I == Entries.begin()) return 0;
221  return &*--I;
222}
223
224
225/// getLineTableFilenameID - Return the uniqued ID for the specified filename.
226///
227unsigned SourceManager::getLineTableFilenameID(const char *Ptr, unsigned Len) {
228  if (LineTable == 0)
229    LineTable = new LineTableInfo();
230  return LineTable->getLineTableFilenameID(Ptr, Len);
231}
232
233
234/// AddLineNote - Add a line note to the line table for the FileID and offset
235/// specified by Loc.  If FilenameID is -1, it is considered to be
236/// unspecified.
237void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
238                                int FilenameID) {
239  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
240
241  const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
242
243  // Remember that this file has #line directives now if it doesn't already.
244  const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
245
246  if (LineTable == 0)
247    LineTable = new LineTableInfo();
248  LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID);
249}
250
251/// AddLineNote - Add a GNU line marker to the line table.
252void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
253                                int FilenameID, bool IsFileEntry,
254                                bool IsFileExit, bool IsSystemHeader,
255                                bool IsExternCHeader) {
256  // If there is no filename and no flags, this is treated just like a #line,
257  // which does not change the flags of the previous line marker.
258  if (FilenameID == -1) {
259    assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader &&
260           "Can't set flags without setting the filename!");
261    return AddLineNote(Loc, LineNo, FilenameID);
262  }
263
264  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
265  const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
266
267  // Remember that this file has #line directives now if it doesn't already.
268  const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
269
270  if (LineTable == 0)
271    LineTable = new LineTableInfo();
272
273  SrcMgr::CharacteristicKind FileKind;
274  if (IsExternCHeader)
275    FileKind = SrcMgr::C_ExternCSystem;
276  else if (IsSystemHeader)
277    FileKind = SrcMgr::C_System;
278  else
279    FileKind = SrcMgr::C_User;
280
281  unsigned EntryExit = 0;
282  if (IsFileEntry)
283    EntryExit = 1;
284  else if (IsFileExit)
285    EntryExit = 2;
286
287  LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID,
288                         EntryExit, FileKind);
289}
290
291
292//===----------------------------------------------------------------------===//
293// Private 'Create' methods.
294//===----------------------------------------------------------------------===//
295
296SourceManager::~SourceManager() {
297  delete LineTable;
298
299  // Delete FileEntry objects corresponding to content caches.  Since the actual
300  // content cache objects are bump pointer allocated, we just have to run the
301  // dtors, but we call the deallocate method for completeness.
302  for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) {
303    MemBufferInfos[i]->~ContentCache();
304    ContentCacheAlloc.Deallocate(MemBufferInfos[i]);
305  }
306  for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator
307       I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
308    I->second->~ContentCache();
309    ContentCacheAlloc.Deallocate(I->second);
310  }
311}
312
313void SourceManager::clearIDTables() {
314  MainFileID = FileID();
315  SLocEntryTable.clear();
316  LastLineNoFileIDQuery = FileID();
317  LastLineNoContentCache = 0;
318  LastFileIDLookup = FileID();
319
320  if (LineTable)
321    LineTable->clear();
322
323  // Use up FileID #0 as an invalid instantiation.
324  NextOffset = 0;
325  createInstantiationLoc(SourceLocation(), SourceLocation(), 1);
326}
327
328/// getOrCreateContentCache - Create or return a cached ContentCache for the
329/// specified file.
330const ContentCache *
331SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) {
332  assert(FileEnt && "Didn't specify a file entry to use?");
333
334  // Do we already have information about this file?
335  ContentCache *&Entry = FileInfos[FileEnt];
336  if (Entry) return Entry;
337
338  // Nope, create a new Cache entry.  Make sure it is at least 8-byte aligned
339  // so that FileInfo can use the low 3 bits of the pointer for its own
340  // nefarious purposes.
341  unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
342  EntryAlign = std::max(8U, EntryAlign);
343  Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
344  new (Entry) ContentCache(FileEnt);
345  return Entry;
346}
347
348
349/// createMemBufferContentCache - Create a new ContentCache for the specified
350///  memory buffer.  This does no caching.
351const ContentCache*
352SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
353  // Add a new ContentCache to the MemBufferInfos list and return it.  Make sure
354  // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of
355  // the pointer for its own nefarious purposes.
356  unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
357  EntryAlign = std::max(8U, EntryAlign);
358  ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
359  new (Entry) ContentCache();
360  MemBufferInfos.push_back(Entry);
361  Entry->setBuffer(Buffer);
362  return Entry;
363}
364
365//===----------------------------------------------------------------------===//
366// Methods to create new FileID's and instantiations.
367//===----------------------------------------------------------------------===//
368
369/// createFileID - Create a new fileID for the specified ContentCache and
370/// include position.  This works regardless of whether the ContentCache
371/// corresponds to a file or some other input source.
372FileID SourceManager::createFileID(const ContentCache *File,
373                                   SourceLocation IncludePos,
374                                   SrcMgr::CharacteristicKind FileCharacter) {
375  SLocEntryTable.push_back(SLocEntry::get(NextOffset,
376                                          FileInfo::get(IncludePos, File,
377                                                        FileCharacter)));
378  unsigned FileSize = File->getSize();
379  assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!");
380  NextOffset += FileSize+1;
381
382  // Set LastFileIDLookup to the newly created file.  The next getFileID call is
383  // almost guaranteed to be from that file.
384  return LastFileIDLookup = FileID::get(SLocEntryTable.size()-1);
385}
386
387/// createInstantiationLoc - Return a new SourceLocation that encodes the fact
388/// that a token from SpellingLoc should actually be referenced from
389/// InstantiationLoc.
390SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
391                                                     SourceLocation InstantLoc,
392                                                     unsigned TokLength) {
393  SLocEntryTable.push_back(SLocEntry::get(NextOffset,
394                                          InstantiationInfo::get(InstantLoc,
395                                                                 SpellingLoc)));
396  assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!");
397  NextOffset += TokLength+1;
398  return SourceLocation::getMacroLoc(NextOffset-(TokLength+1));
399}
400
401/// getBufferData - Return a pointer to the start and end of the source buffer
402/// data for the specified FileID.
403std::pair<const char*, const char*>
404SourceManager::getBufferData(FileID FID) const {
405  const llvm::MemoryBuffer *Buf = getBuffer(FID);
406  return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
407}
408
409
410//===----------------------------------------------------------------------===//
411// SourceLocation manipulation methods.
412//===----------------------------------------------------------------------===//
413
414/// getFileIDSlow - Return the FileID for a SourceLocation.  This is a very hot
415/// method that is used for all SourceManager queries that start with a
416/// SourceLocation object.  It is responsible for finding the entry in
417/// SLocEntryTable which contains the specified location.
418///
419FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
420  assert(SLocOffset && "Invalid FileID");
421
422  // After the first and second level caches, I see two common sorts of
423  // behavior: 1) a lot of searched FileID's are "near" the cached file location
424  // or are "near" the cached instantiation location.  2) others are just
425  // completely random and may be a very long way away.
426  //
427  // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
428  // then we fall back to a less cache efficient, but more scalable, binary
429  // search to find the location.
430
431  // See if this is near the file point - worst case we start scanning from the
432  // most newly created FileID.
433  std::vector<SrcMgr::SLocEntry>::const_iterator I;
434
435  if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) {
436    // Neither loc prunes our search.
437    I = SLocEntryTable.end();
438  } else {
439    // Perhaps it is near the file point.
440    I = SLocEntryTable.begin()+LastFileIDLookup.ID;
441  }
442
443  // Find the FileID that contains this.  "I" is an iterator that points to a
444  // FileID whose offset is known to be larger than SLocOffset.
445  unsigned NumProbes = 0;
446  while (1) {
447    --I;
448    if (I->getOffset() <= SLocOffset) {
449#if 0
450      printf("lin %d -> %d [%s] %d %d\n", SLocOffset,
451             I-SLocEntryTable.begin(),
452             I->isInstantiation() ? "inst" : "file",
453             LastFileIDLookup.ID,  int(SLocEntryTable.end()-I));
454#endif
455      FileID Res = FileID::get(I-SLocEntryTable.begin());
456
457      // If this isn't an instantiation, remember it.  We have good locality
458      // across FileID lookups.
459      if (!I->isInstantiation())
460        LastFileIDLookup = Res;
461      NumLinearScans += NumProbes+1;
462      return Res;
463    }
464    if (++NumProbes == 8)
465      break;
466  }
467
468  // Convert "I" back into an index.  We know that it is an entry whose index is
469  // larger than the offset we are looking for.
470  unsigned GreaterIndex = I-SLocEntryTable.begin();
471  // LessIndex - This is the lower bound of the range that we're searching.
472  // We know that the offset corresponding to the FileID is is less than
473  // SLocOffset.
474  unsigned LessIndex = 0;
475  NumProbes = 0;
476  while (1) {
477    unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
478    unsigned MidOffset = SLocEntryTable[MiddleIndex].getOffset();
479
480    ++NumProbes;
481
482    // If the offset of the midpoint is too large, chop the high side of the
483    // range to the midpoint.
484    if (MidOffset > SLocOffset) {
485      GreaterIndex = MiddleIndex;
486      continue;
487    }
488
489    // If the middle index contains the value, succeed and return.
490    if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) {
491#if 0
492      printf("bin %d -> %d [%s] %d %d\n", SLocOffset,
493             I-SLocEntryTable.begin(),
494             I->isInstantiation() ? "inst" : "file",
495             LastFileIDLookup.ID, int(SLocEntryTable.end()-I));
496#endif
497      FileID Res = FileID::get(MiddleIndex);
498
499      // If this isn't an instantiation, remember it.  We have good locality
500      // across FileID lookups.
501      if (!I->isInstantiation())
502        LastFileIDLookup = Res;
503      NumBinaryProbes += NumProbes;
504      return Res;
505    }
506
507    // Otherwise, move the low-side up to the middle index.
508    LessIndex = MiddleIndex;
509  }
510}
511
512SourceLocation SourceManager::
513getInstantiationLocSlowCase(SourceLocation Loc) const {
514  do {
515    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
516    Loc =getSLocEntry(LocInfo.first).getInstantiation().getInstantiationLoc();
517    Loc = Loc.getFileLocWithOffset(LocInfo.second);
518  } while (!Loc.isFileID());
519
520  return Loc;
521}
522
523SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const {
524  do {
525    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
526    Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
527    Loc = Loc.getFileLocWithOffset(LocInfo.second);
528  } while (!Loc.isFileID());
529  return Loc;
530}
531
532
533std::pair<FileID, unsigned>
534SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
535                                                     unsigned Offset) const {
536  // If this is an instantiation record, walk through all the instantiation
537  // points.
538  FileID FID;
539  SourceLocation Loc;
540  do {
541    Loc = E->getInstantiation().getInstantiationLoc();
542
543    FID = getFileID(Loc);
544    E = &getSLocEntry(FID);
545    Offset += Loc.getOffset()-E->getOffset();
546  } while (!Loc.isFileID());
547
548  return std::make_pair(FID, Offset);
549}
550
551std::pair<FileID, unsigned>
552SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
553                                                unsigned Offset) const {
554  // If this is an instantiation record, walk through all the instantiation
555  // points.
556  FileID FID;
557  SourceLocation Loc;
558  do {
559    Loc = E->getInstantiation().getSpellingLoc();
560
561    FID = getFileID(Loc);
562    E = &getSLocEntry(FID);
563    Offset += Loc.getOffset()-E->getOffset();
564  } while (!Loc.isFileID());
565
566  return std::make_pair(FID, Offset);
567}
568
569
570//===----------------------------------------------------------------------===//
571// Queries about the code at a SourceLocation.
572//===----------------------------------------------------------------------===//
573
574/// getCharacterData - Return a pointer to the start of the specified location
575/// in the appropriate MemoryBuffer.
576const char *SourceManager::getCharacterData(SourceLocation SL) const {
577  // Note that this is a hot function in the getSpelling() path, which is
578  // heavily used by -E mode.
579  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
580
581  // Note that calling 'getBuffer()' may lazily page in a source file.
582  return getSLocEntry(LocInfo.first).getFile().getContentCache()
583              ->getBuffer()->getBufferStart() + LocInfo.second;
584}
585
586
587/// getColumnNumber - Return the column # for the specified file position.
588/// this is significantly cheaper to compute than the line number.
589unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
590  const char *Buf = getBuffer(FID)->getBufferStart();
591
592  unsigned LineStart = FilePos;
593  while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
594    --LineStart;
595  return FilePos-LineStart+1;
596}
597
598unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const {
599  if (Loc.isInvalid()) return 0;
600  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
601  return getColumnNumber(LocInfo.first, LocInfo.second);
602}
603
604unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const {
605  if (Loc.isInvalid()) return 0;
606  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
607  return getColumnNumber(LocInfo.first, LocInfo.second);
608}
609
610
611
612static void ComputeLineNumbers(ContentCache* FI,
613                               llvm::BumpPtrAllocator &Alloc) DISABLE_INLINE;
614static void ComputeLineNumbers(ContentCache* FI, llvm::BumpPtrAllocator &Alloc){
615  // Note that calling 'getBuffer()' may lazily page in the file.
616  const MemoryBuffer *Buffer = FI->getBuffer();
617
618  // Find the file offsets of all of the *physical* source lines.  This does
619  // not look at trigraphs, escaped newlines, or anything else tricky.
620  std::vector<unsigned> LineOffsets;
621
622  // Line #1 starts at char 0.
623  LineOffsets.push_back(0);
624
625  const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
626  const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
627  unsigned Offs = 0;
628  while (1) {
629    // Skip over the contents of the line.
630    // TODO: Vectorize this?  This is very performance sensitive for programs
631    // with lots of diagnostics and in -E mode.
632    const unsigned char *NextBuf = (const unsigned char *)Buf;
633    while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
634      ++NextBuf;
635    Offs += NextBuf-Buf;
636    Buf = NextBuf;
637
638    if (Buf[0] == '\n' || Buf[0] == '\r') {
639      // If this is \n\r or \r\n, skip both characters.
640      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
641        ++Offs, ++Buf;
642      ++Offs, ++Buf;
643      LineOffsets.push_back(Offs);
644    } else {
645      // Otherwise, this is a null.  If end of file, exit.
646      if (Buf == End) break;
647      // Otherwise, skip the null.
648      ++Offs, ++Buf;
649    }
650  }
651
652  // Copy the offsets into the FileInfo structure.
653  FI->NumLines = LineOffsets.size();
654  FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size());
655  std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
656}
657
658/// getLineNumber - Given a SourceLocation, return the spelling line number
659/// for the position indicated.  This requires building and caching a table of
660/// line offsets for the MemoryBuffer, so this is not cheap: use only when
661/// about to emit a diagnostic.
662unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
663  ContentCache *Content;
664  if (LastLineNoFileIDQuery == FID)
665    Content = LastLineNoContentCache;
666  else
667    Content = const_cast<ContentCache*>(getSLocEntry(FID)
668                                        .getFile().getContentCache());
669
670  // If this is the first use of line information for this buffer, compute the
671  /// SourceLineCache for it on demand.
672  if (Content->SourceLineCache == 0)
673    ComputeLineNumbers(Content, ContentCacheAlloc);
674
675  // Okay, we know we have a line number table.  Do a binary search to find the
676  // line number that this character position lands on.
677  unsigned *SourceLineCache = Content->SourceLineCache;
678  unsigned *SourceLineCacheStart = SourceLineCache;
679  unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
680
681  unsigned QueriedFilePos = FilePos+1;
682
683  // If the previous query was to the same file, we know both the file pos from
684  // that query and the line number returned.  This allows us to narrow the
685  // search space from the entire file to something near the match.
686  if (LastLineNoFileIDQuery == FID) {
687    if (QueriedFilePos >= LastLineNoFilePos) {
688      SourceLineCache = SourceLineCache+LastLineNoResult-1;
689
690      // The query is likely to be nearby the previous one.  Here we check to
691      // see if it is within 5, 10 or 20 lines.  It can be far away in cases
692      // where big comment blocks and vertical whitespace eat up lines but
693      // contribute no tokens.
694      if (SourceLineCache+5 < SourceLineCacheEnd) {
695        if (SourceLineCache[5] > QueriedFilePos)
696          SourceLineCacheEnd = SourceLineCache+5;
697        else if (SourceLineCache+10 < SourceLineCacheEnd) {
698          if (SourceLineCache[10] > QueriedFilePos)
699            SourceLineCacheEnd = SourceLineCache+10;
700          else if (SourceLineCache+20 < SourceLineCacheEnd) {
701            if (SourceLineCache[20] > QueriedFilePos)
702              SourceLineCacheEnd = SourceLineCache+20;
703          }
704        }
705      }
706    } else {
707      SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
708    }
709  }
710
711  // If the spread is large, do a "radix" test as our initial guess, based on
712  // the assumption that lines average to approximately the same length.
713  // NOTE: This is currently disabled, as it does not appear to be profitable in
714  // initial measurements.
715  if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
716    unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
717
718    // Take a stab at guessing where it is.
719    unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
720
721    // Check for -10 and +10 lines.
722    unsigned LowerBound = std::max(int(ApproxPos-10), 0);
723    unsigned UpperBound = std::min(ApproxPos+10, FileLen);
724
725    // If the computed lower bound is less than the query location, move it in.
726    if (SourceLineCache < SourceLineCacheStart+LowerBound &&
727        SourceLineCacheStart[LowerBound] < QueriedFilePos)
728      SourceLineCache = SourceLineCacheStart+LowerBound;
729
730    // If the computed upper bound is greater than the query location, move it.
731    if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
732        SourceLineCacheStart[UpperBound] >= QueriedFilePos)
733      SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
734  }
735
736  unsigned *Pos
737    = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
738  unsigned LineNo = Pos-SourceLineCacheStart;
739
740  LastLineNoFileIDQuery = FID;
741  LastLineNoContentCache = Content;
742  LastLineNoFilePos = QueriedFilePos;
743  LastLineNoResult = LineNo;
744  return LineNo;
745}
746
747unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const {
748  if (Loc.isInvalid()) return 0;
749  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
750  return getLineNumber(LocInfo.first, LocInfo.second);
751}
752unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const {
753  if (Loc.isInvalid()) return 0;
754  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
755  return getLineNumber(LocInfo.first, LocInfo.second);
756}
757
758/// getFileCharacteristic - return the file characteristic of the specified
759/// source location, indicating whether this is a normal file, a system
760/// header, or an "implicit extern C" system header.
761///
762/// This state can be modified with flags on GNU linemarker directives like:
763///   # 4 "foo.h" 3
764/// which changes all source locations in the current file after that to be
765/// considered to be from a system header.
766SrcMgr::CharacteristicKind
767SourceManager::getFileCharacteristic(SourceLocation Loc) const {
768  assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!");
769  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
770  const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
771
772  // If there are no #line directives in this file, just return the whole-file
773  // state.
774  if (!FI.hasLineDirectives())
775    return FI.getFileCharacteristic();
776
777  assert(LineTable && "Can't have linetable entries without a LineTable!");
778  // See if there is a #line directive before the location.
779  const LineEntry *Entry =
780    LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second);
781
782  // If this is before the first line marker, use the file characteristic.
783  if (!Entry)
784    return FI.getFileCharacteristic();
785
786  return Entry->FileKind;
787}
788
789
790/// getPresumedLoc - This method returns the "presumed" location of a
791/// SourceLocation specifies.  A "presumed location" can be modified by #line
792/// or GNU line marker directives.  This provides a view on the data that a
793/// user should see in diagnostics, for example.
794///
795/// Note that a presumed location is always given as the instantiation point
796/// of an instantiation location, not at the spelling location.
797PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const {
798  if (Loc.isInvalid()) return PresumedLoc();
799
800  // Presumed locations are always for instantiation points.
801  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
802
803  const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
804  const SrcMgr::ContentCache *C = FI.getContentCache();
805
806  // To get the source name, first consult the FileEntry (if one exists)
807  // before the MemBuffer as this will avoid unnecessarily paging in the
808  // MemBuffer.
809  const char *Filename =
810    C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
811  unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second);
812  unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second);
813  SourceLocation IncludeLoc = FI.getIncludeLoc();
814
815  // If we have #line directives in this file, update and overwrite the physical
816  // location info if appropriate.
817  if (FI.hasLineDirectives()) {
818    assert(LineTable && "Can't have linetable entries without a LineTable!");
819    // See if there is a #line directive before this.  If so, get it.
820    if (const LineEntry *Entry =
821          LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) {
822      // If the LineEntry indicates a filename, use it.
823      if (Entry->FilenameID != -1)
824        Filename = LineTable->getFilename(Entry->FilenameID);
825
826      // Use the line number specified by the LineEntry.  This line number may
827      // be multiple lines down from the line entry.  Add the difference in
828      // physical line numbers from the query point and the line marker to the
829      // total.
830      unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset);
831      LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1);
832
833      // Note that column numbers are not molested by line markers.
834    }
835  }
836
837  return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc);
838}
839
840//===----------------------------------------------------------------------===//
841// Other miscellaneous methods.
842//===----------------------------------------------------------------------===//
843
844
845/// PrintStats - Print statistics to stderr.
846///
847void SourceManager::PrintStats() const {
848  llvm::cerr << "\n*** Source Manager Stats:\n";
849  llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
850             << " mem buffers mapped.\n";
851  llvm::cerr << SLocEntryTable.size() << " SLocEntry's allocated, "
852             << NextOffset << "B of Sloc address space used.\n";
853
854  unsigned NumLineNumsComputed = 0;
855  unsigned NumFileBytesMapped = 0;
856  for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
857    NumLineNumsComputed += I->second->SourceLineCache != 0;
858    NumFileBytesMapped  += I->second->getSizeBytesMapped();
859  }
860
861  llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
862             << NumLineNumsComputed << " files with line #'s computed.\n";
863  llvm::cerr << "FileID scans: " << NumLinearScans << " linear, "
864             << NumBinaryProbes << " binary.\n";
865}
866
867//===----------------------------------------------------------------------===//
868// Serialization.
869//===----------------------------------------------------------------------===//
870
871void ContentCache::Emit(llvm::Serializer& S) const {
872  S.FlushRecord();
873  S.EmitPtr(this);
874
875  if (Entry) {
876    llvm::sys::Path Fname(Buffer->getBufferIdentifier());
877
878    if (Fname.isAbsolute())
879      S.EmitCStr(Fname.c_str());
880    else {
881      // Create an absolute path.
882      // FIXME: This will potentially contain ".." and "." in the path.
883      llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
884      path.appendComponent(Fname.c_str());
885      S.EmitCStr(path.c_str());
886    }
887  }
888  else {
889    const char* p = Buffer->getBufferStart();
890    const char* e = Buffer->getBufferEnd();
891
892    S.EmitInt(e-p);
893
894    for ( ; p != e; ++p)
895      S.EmitInt(*p);
896  }
897
898  S.FlushRecord();
899}
900
901void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
902                                       SourceManager& SMgr,
903                                       FileManager* FMgr,
904                                       std::vector<char>& Buf) {
905  if (FMgr) {
906    llvm::SerializedPtrID PtrID = D.ReadPtrID();
907    D.ReadCStr(Buf,false);
908
909    // Create/fetch the FileEntry.
910    const char* start = &Buf[0];
911    const FileEntry* E = FMgr->getFile(start,start+Buf.size());
912
913    // FIXME: Ideally we want a lazy materialization of the ContentCache
914    //  anyway, because we don't want to read in source files unless this
915    //  is absolutely needed.
916    if (!E)
917      D.RegisterPtr(PtrID,NULL);
918    else
919      // Get the ContextCache object and register it with the deserializer.
920      D.RegisterPtr(PtrID, SMgr.getOrCreateContentCache(E));
921    return;
922  }
923
924  // Register the ContextCache object with the deserializer.
925  /* FIXME:
926  ContentCache *Entry
927  SMgr.MemBufferInfos.push_back(ContentCache());
928   = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
929  D.RegisterPtr(&Entry);
930
931  // Create the buffer.
932  unsigned Size = D.ReadInt();
933  Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
934
935  // Read the contents of the buffer.
936  char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
937  for (unsigned i = 0; i < Size ; ++i)
938    p[i] = D.ReadInt();
939   */
940}
941
942void SourceManager::Emit(llvm::Serializer& S) const {
943  S.EnterBlock();
944  S.EmitPtr(this);
945  S.EmitInt(MainFileID.getOpaqueValue());
946
947  // Emit: FileInfos.  Just emit the file name.
948  S.EnterBlock();
949
950  // FIXME: Emit FileInfos.
951  //std::for_each(FileInfos.begin(), FileInfos.end(),
952  //              S.MakeEmitter<ContentCache>());
953
954  S.ExitBlock();
955
956  // Emit: MemBufferInfos
957  S.EnterBlock();
958
959  /* FIXME: EMIT.
960  std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
961                S.MakeEmitter<ContentCache>());
962   */
963
964  S.ExitBlock();
965
966  // FIXME: Emit SLocEntryTable.
967
968  S.ExitBlock();
969}
970
971SourceManager*
972SourceManager::CreateAndRegister(llvm::Deserializer &D, FileManager &FMgr) {
973  SourceManager *M = new SourceManager();
974  D.RegisterPtr(M);
975
976  // Read: the FileID of the main source file of the translation unit.
977  M->MainFileID = FileID::get(D.ReadInt());
978
979  std::vector<char> Buf;
980
981  /*{ // FIXME Read: FileInfos.
982    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
983    while (!D.FinishedBlock(BLoc))
984    ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
985  }*/
986
987  { // Read: MemBufferInfos.
988    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
989    while (!D.FinishedBlock(BLoc))
990    ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
991  }
992
993  // FIXME: Read SLocEntryTable.
994
995  return M;
996}
997