SourceManager.cpp revision 721818304ac462d8c6ce05eecd02884033db78f1
1//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/MemoryBuffer.h"
18#include "llvm/System/Path.h"
19#include "llvm/Bitcode/Serialize.h"
20#include "llvm/Bitcode/Deserialize.h"
21#include "llvm/Support/Streams.h"
22#include <algorithm>
23using namespace clang;
24using namespace SrcMgr;
25using llvm::MemoryBuffer;
26
27ContentCache::~ContentCache() {
28  delete Buffer;
29  delete [] SourceLineCache;
30}
31
32/// getFileInfo - Create or return a cached FileInfo for the specified file.
33///
34const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) {
35
36  assert(FileEnt && "Didn't specify a file entry to use?");
37  // Do we already have information about this file?
38  std::set<ContentCache>::iterator I =
39    FileInfos.lower_bound(ContentCache(FileEnt));
40
41  if (I != FileInfos.end() && I->Entry == FileEnt)
42    return &*I;
43
44  // Nope, get information.
45  const MemoryBuffer *File =
46    MemoryBuffer::getFile(FileEnt->getName(), 0, FileEnt->getSize());
47  if (File == 0)
48    return 0;
49
50  ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt));
51
52  Entry.Buffer = File;
53  Entry.SourceLineCache = 0;
54  Entry.NumLines = 0;
55  return &Entry;
56}
57
58
59/// createMemBufferContentCache - Create a new ContentCache for the specified
60///  memory buffer.  This does no caching.
61const ContentCache*
62SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
63  // Add a new ContentCache to the MemBufferInfos list and return it.  We
64  // must default construct the object first that the instance actually
65  // stored within MemBufferInfos actually owns the Buffer, and not any
66  // temporary we would use in the call to "push_back".
67  MemBufferInfos.push_back(ContentCache());
68  ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back());
69  Entry.Buffer = Buffer;
70  return &Entry;
71}
72
73
74/// createFileID - Create a new fileID for the specified ContentCache and
75/// include position.  This works regardless of whether the ContentCache
76/// corresponds to a file or some other input source.
77unsigned SourceManager::createFileID(const ContentCache *File,
78                                     SourceLocation IncludePos,
79                                     unsigned DirCharacter) {
80  // If FileEnt is really large (e.g. it's a large .i file), we may not be able
81  // to fit an arbitrary position in the file in the FilePos field.  To handle
82  // this, we create one FileID for each chunk of the file that fits in a
83  // FilePos field.
84  unsigned FileSize = File->Buffer->getBufferSize();
85  if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
86    FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File, DirCharacter));
87    assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
88           "Ran out of file ID's!");
89    return FileIDs.size();
90  }
91
92  // Create one FileID for each chunk of the file.
93  unsigned Result = FileIDs.size()+1;
94
95  unsigned ChunkNo = 0;
96  while (1) {
97    FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File,
98                                      DirCharacter));
99
100    if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
101    FileSize -= (1 << SourceLocation::FilePosBits);
102  }
103
104  assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
105         "Ran out of file ID's!");
106  return Result;
107}
108
109/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
110/// that a token from physloc PhysLoc should actually be referenced from
111/// InstantiationLoc.
112SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
113                                                  SourceLocation InstantLoc) {
114  // The specified source location may be a mapped location, due to a macro
115  // instantiation or #line directive.  Strip off this information to find out
116  // where the characters are actually located.
117  PhysLoc = getPhysicalLoc(PhysLoc);
118
119  // Resolve InstantLoc down to a real logical location.
120  InstantLoc = getLogicalLoc(InstantLoc);
121
122
123  // If the last macro id is close to the currently requested location, try to
124  // reuse it.  This implements a small cache.
125  for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
126    MacroIDInfo &LastOne = MacroIDs[i];
127
128    // The instanitation point and source physloc have to exactly match to reuse
129    // (for now).  We could allow "nearby" instantiations in the future.
130    if (LastOne.getVirtualLoc() != InstantLoc ||
131        LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID())
132      continue;
133
134    // Check to see if the physloc of the token came from near enough to reuse.
135    int PhysDelta = PhysLoc.getRawFilePos() -
136                    LastOne.getPhysicalLoc().getRawFilePos();
137    if (SourceLocation::isValidMacroPhysOffs(PhysDelta))
138      return SourceLocation::getMacroLoc(i, PhysDelta);
139  }
140
141
142  MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
143  return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0);
144}
145
146/// getBufferData - Return a pointer to the start and end of the character
147/// data for the specified FileID.
148std::pair<const char*, const char*>
149SourceManager::getBufferData(unsigned FileID) const {
150  const llvm::MemoryBuffer *Buf = getBuffer(FileID);
151  return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
152}
153
154
155/// getCharacterData - Return a pointer to the start of the specified location
156/// in the appropriate MemoryBuffer.
157const char *SourceManager::getCharacterData(SourceLocation SL) const {
158  // Note that this is a hot function in the getSpelling() path, which is
159  // heavily used by -E mode.
160  SL = getPhysicalLoc(SL);
161
162  return getContentCache(SL.getFileID())->Buffer->getBufferStart() +
163         getFullFilePos(SL);
164}
165
166
167/// getColumnNumber - Return the column # for the specified file position.
168/// this is significantly cheaper to compute than the line number.  This returns
169/// zero if the column number isn't known.
170unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
171  unsigned FileID = Loc.getFileID();
172  if (FileID == 0) return 0;
173
174  unsigned FilePos = getFullFilePos(Loc);
175  const MemoryBuffer *Buffer = getBuffer(FileID);
176  const char *Buf = Buffer->getBufferStart();
177
178  unsigned LineStart = FilePos;
179  while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
180    --LineStart;
181  return FilePos-LineStart+1;
182}
183
184/// getSourceName - This method returns the name of the file or buffer that
185/// the SourceLocation specifies.  This can be modified with #line directives,
186/// etc.
187const char *SourceManager::getSourceName(SourceLocation Loc) const {
188  unsigned FileID = Loc.getFileID();
189  if (FileID == 0) return "";
190  return getContentCache(FileID)->Buffer->getBufferIdentifier();
191}
192
193static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE;
194static void ComputeLineNumbers(ContentCache* FI) {
195  const MemoryBuffer *Buffer = FI->Buffer;
196
197  // Find the file offsets of all of the *physical* source lines.  This does
198  // not look at trigraphs, escaped newlines, or anything else tricky.
199  std::vector<unsigned> LineOffsets;
200
201  // Line #1 starts at char 0.
202  LineOffsets.push_back(0);
203
204  const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
205  const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
206  unsigned Offs = 0;
207  while (1) {
208    // Skip over the contents of the line.
209    // TODO: Vectorize this?  This is very performance sensitive for programs
210    // with lots of diagnostics and in -E mode.
211    const unsigned char *NextBuf = (const unsigned char *)Buf;
212    while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
213      ++NextBuf;
214    Offs += NextBuf-Buf;
215    Buf = NextBuf;
216
217    if (Buf[0] == '\n' || Buf[0] == '\r') {
218      // If this is \n\r or \r\n, skip both characters.
219      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
220        ++Offs, ++Buf;
221      ++Offs, ++Buf;
222      LineOffsets.push_back(Offs);
223    } else {
224      // Otherwise, this is a null.  If end of file, exit.
225      if (Buf == End) break;
226      // Otherwise, skip the null.
227      ++Offs, ++Buf;
228    }
229  }
230
231  // Copy the offsets into the FileInfo structure.
232  FI->NumLines = LineOffsets.size();
233  FI->SourceLineCache = new unsigned[LineOffsets.size()];
234  std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
235}
236
237/// getLineNumber - Given a SourceLocation, return the physical line number
238/// for the position indicated.  This requires building and caching a table of
239/// line offsets for the MemoryBuffer, so this is not cheap: use only when
240/// about to emit a diagnostic.
241unsigned SourceManager::getLineNumber(SourceLocation Loc) {
242  unsigned FileID = Loc.getFileID();
243  if (FileID == 0) return 0;
244
245  ContentCache* Content;
246
247  if (LastLineNoFileIDQuery == FileID)
248    Content = LastLineNoContentCache;
249  else
250    Content = const_cast<ContentCache*>(getContentCache(FileID));
251
252  // If this is the first use of line information for this buffer, compute the
253  /// SourceLineCache for it on demand.
254  if (Content->SourceLineCache == 0)
255    ComputeLineNumbers(Content);
256
257  // Okay, we know we have a line number table.  Do a binary search to find the
258  // line number that this character position lands on.
259  unsigned *SourceLineCache = Content->SourceLineCache;
260  unsigned *SourceLineCacheStart = SourceLineCache;
261  unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
262
263  unsigned QueriedFilePos = getFullFilePos(Loc)+1;
264
265  // If the previous query was to the same file, we know both the file pos from
266  // that query and the line number returned.  This allows us to narrow the
267  // search space from the entire file to something near the match.
268  if (LastLineNoFileIDQuery == FileID) {
269    if (QueriedFilePos >= LastLineNoFilePos) {
270      SourceLineCache = SourceLineCache+LastLineNoResult-1;
271
272      // The query is likely to be nearby the previous one.  Here we check to
273      // see if it is within 5, 10 or 20 lines.  It can be far away in cases
274      // where big comment blocks and vertical whitespace eat up lines but
275      // contribute no tokens.
276      if (SourceLineCache+5 < SourceLineCacheEnd) {
277        if (SourceLineCache[5] > QueriedFilePos)
278          SourceLineCacheEnd = SourceLineCache+5;
279        else if (SourceLineCache+10 < SourceLineCacheEnd) {
280          if (SourceLineCache[10] > QueriedFilePos)
281            SourceLineCacheEnd = SourceLineCache+10;
282          else if (SourceLineCache+20 < SourceLineCacheEnd) {
283            if (SourceLineCache[20] > QueriedFilePos)
284              SourceLineCacheEnd = SourceLineCache+20;
285          }
286        }
287      }
288    } else {
289      SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
290    }
291  }
292
293  // If the spread is large, do a "radix" test as our initial guess, based on
294  // the assumption that lines average to approximately the same length.
295  // NOTE: This is currently disabled, as it does not appear to be profitable in
296  // initial measurements.
297  if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
298    unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
299
300    // Take a stab at guessing where it is.
301    unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
302
303    // Check for -10 and +10 lines.
304    unsigned LowerBound = std::max(int(ApproxPos-10), 0);
305    unsigned UpperBound = std::min(ApproxPos+10, FileLen);
306
307    // If the computed lower bound is less than the query location, move it in.
308    if (SourceLineCache < SourceLineCacheStart+LowerBound &&
309        SourceLineCacheStart[LowerBound] < QueriedFilePos)
310      SourceLineCache = SourceLineCacheStart+LowerBound;
311
312    // If the computed upper bound is greater than the query location, move it.
313    if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
314        SourceLineCacheStart[UpperBound] >= QueriedFilePos)
315      SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
316  }
317
318  unsigned *Pos
319    = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
320  unsigned LineNo = Pos-SourceLineCacheStart;
321
322  LastLineNoFileIDQuery = FileID;
323  LastLineNoContentCache = Content;
324  LastLineNoFilePos = QueriedFilePos;
325  LastLineNoResult = LineNo;
326  return LineNo;
327}
328
329/// PrintStats - Print statistics to stderr.
330///
331void SourceManager::PrintStats() const {
332  llvm::cerr << "\n*** Source Manager Stats:\n";
333  llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
334             << " mem buffers mapped, " << FileIDs.size()
335             << " file ID's allocated.\n";
336  llvm::cerr << "  " << FileIDs.size() << " normal buffer FileID's, "
337             << MacroIDs.size() << " macro expansion FileID's.\n";
338
339  unsigned NumLineNumsComputed = 0;
340  unsigned NumFileBytesMapped = 0;
341  for (std::set<ContentCache>::const_iterator I =
342       FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
343    NumLineNumsComputed += I->SourceLineCache != 0;
344    NumFileBytesMapped  += I->Buffer->getBufferSize();
345  }
346
347  llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
348             << NumLineNumsComputed << " files with line #'s computed.\n";
349}
350
351//===----------------------------------------------------------------------===//
352// Serialization.
353//===----------------------------------------------------------------------===//
354
355void ContentCache::Emit(llvm::Serializer& S) const {
356  S.FlushRecord();
357  S.EmitPtr(this);
358
359  if (Entry) {
360    llvm::sys::Path Fname(Buffer->getBufferIdentifier());
361
362    if (Fname.isAbsolute())
363      S.EmitCStr(Fname.c_str());
364    else {
365      // Create an absolute path.
366      // FIXME: This will potentially contain ".." and "." in the path.
367      llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
368      path.appendComponent(Fname.c_str());
369      S.EmitCStr(path.c_str());
370    }
371  }
372  else {
373    const char* p = Buffer->getBufferStart();
374    const char* e = Buffer->getBufferEnd();
375
376    S.EmitInt(e-p);
377
378    for ( ; p != e; ++p)
379      S.EmitInt(*p);
380  }
381
382  S.FlushRecord();
383}
384
385void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
386                                       SourceManager& SMgr,
387                                       FileManager* FMgr,
388                                       std::vector<char>& Buf) {
389  if (FMgr) {
390    llvm::SerializedPtrID PtrID = D.ReadPtrID();
391    D.ReadCStr(Buf,false);
392
393    // Create/fetch the FileEntry.
394    const char* start = &Buf[0];
395    const FileEntry* E = FMgr->getFile(start,start+Buf.size());
396
397    // FIXME: Ideally we want a lazy materialization of the ContentCache
398    //  anyway, because we don't want to read in source files unless this
399    //  is absolutely needed.
400    if (!E)
401      D.RegisterPtr(PtrID,NULL);
402    else
403      // Get the ContextCache object and register it with the deserializer.
404      D.RegisterPtr(PtrID,SMgr.getContentCache(E));
405  }
406  else {
407    // Register the ContextCache object with the deserializer.
408    SMgr.MemBufferInfos.push_back(ContentCache());
409    ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
410    D.RegisterPtr(&Entry);
411
412    // Create the buffer.
413    unsigned Size = D.ReadInt();
414    Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
415
416    // Read the contents of the buffer.
417    char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
418    for (unsigned i = 0; i < Size ; ++i)
419      p[i] = D.ReadInt();
420  }
421}
422
423void FileIDInfo::Emit(llvm::Serializer& S) const {
424  S.Emit(IncludeLoc);
425  S.EmitInt(ChunkNo);
426  S.EmitPtr(Content);
427}
428
429FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) {
430  FileIDInfo I;
431  I.IncludeLoc = SourceLocation::ReadVal(D);
432  I.ChunkNo = D.ReadInt();
433  D.ReadPtr(I.Content,false);
434  return I;
435}
436
437void MacroIDInfo::Emit(llvm::Serializer& S) const {
438  S.Emit(VirtualLoc);
439  S.Emit(PhysicalLoc);
440}
441
442MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) {
443  MacroIDInfo I;
444  I.VirtualLoc = SourceLocation::ReadVal(D);
445  I.PhysicalLoc = SourceLocation::ReadVal(D);
446  return I;
447}
448
449void SourceManager::Emit(llvm::Serializer& S) const {
450  S.EnterBlock();
451  S.EmitPtr(this);
452  S.EmitInt(MainFileID);
453
454  // Emit: FileInfos.  Just emit the file name.
455  S.EnterBlock();
456
457  std::for_each(FileInfos.begin(),FileInfos.end(),
458                S.MakeEmitter<ContentCache>());
459
460  S.ExitBlock();
461
462  // Emit: MemBufferInfos
463  S.EnterBlock();
464
465  std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
466                S.MakeEmitter<ContentCache>());
467
468  S.ExitBlock();
469
470  // Emit: FileIDs
471  S.EmitInt(FileIDs.size());
472  std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>());
473
474  // Emit: MacroIDs
475  S.EmitInt(MacroIDs.size());
476  std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>());
477
478  S.ExitBlock();
479}
480
481SourceManager*
482SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){
483  SourceManager *M = new SourceManager();
484  D.RegisterPtr(M);
485
486  // Read: the FileID of the main source file of the translation unit.
487  M->MainFileID = D.ReadInt();
488
489  std::vector<char> Buf;
490
491  { // Read: FileInfos.
492    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
493    while (!D.FinishedBlock(BLoc))
494    ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
495  }
496
497  { // Read: MemBufferInfos.
498    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
499    while (!D.FinishedBlock(BLoc))
500    ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
501  }
502
503  // Read: FileIDs.
504  unsigned Size = D.ReadInt();
505  M->FileIDs.reserve(Size);
506  for (; Size > 0 ; --Size)
507    M->FileIDs.push_back(FileIDInfo::ReadVal(D));
508
509  // Read: MacroIDs.
510  Size = D.ReadInt();
511  M->MacroIDs.reserve(Size);
512  for (; Size > 0 ; --Size)
513    M->MacroIDs.push_back(MacroIDInfo::ReadVal(D));
514
515  return M;
516}
517