SourceManager.cpp revision 3c1f7b615c03e55f8aaee14a5793c917c050b373
1//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
16#include "llvm/Config/config.h"
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/MemoryBuffer.h"
19#include "llvm/System/Path.h"
20#include "llvm/Bitcode/Serialize.h"
21#include "llvm/Bitcode/Deserialize.h"
22#include "llvm/Support/Streams.h"
23#include <algorithm>
24#include <fcntl.h>
25using namespace clang;
26using namespace SrcMgr;
27using llvm::MemoryBuffer;
28
29ContentCache::~ContentCache() {
30  delete Buffer;
31  delete [] SourceLineCache;
32}
33
34/// getFileInfo - Create or return a cached FileInfo for the specified file.
35///
36const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) {
37
38  assert(FileEnt && "Didn't specify a file entry to use?");
39  // Do we already have information about this file?
40  std::set<ContentCache>::iterator I =
41    FileInfos.lower_bound(ContentCache(FileEnt));
42
43  if (I != FileInfos.end() && I->Entry == FileEnt)
44    return &*I;
45
46  // Nope, get information.
47  const MemoryBuffer *File =
48    MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()), 0,
49                          FileEnt->getSize());
50  if (File == 0)
51    return 0;
52
53  ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt));
54
55  Entry.Buffer = File;
56  Entry.SourceLineCache = 0;
57  Entry.NumLines = 0;
58  return &Entry;
59}
60
61
62/// createMemBufferContentCache - Create a new ContentCache for the specified
63///  memory buffer.  This does no caching.
64const ContentCache*
65SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
66  // Add a new ContentCache to the MemBufferInfos list and return it.  We
67  // must default construct the object first that the instance actually
68  // stored within MemBufferInfos actually owns the Buffer, and not any
69  // temporary we would use in the call to "push_back".
70  MemBufferInfos.push_back(ContentCache());
71  ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back());
72  Entry.Buffer = Buffer;
73  return &Entry;
74}
75
76
77/// createFileID - Create a new fileID for the specified ContentCache and
78/// include position.  This works regardless of whether the ContentCache
79/// corresponds to a file or some other input source.
80unsigned SourceManager::createFileID(const ContentCache *File,
81                                     SourceLocation IncludePos) {
82  // If FileEnt is really large (e.g. it's a large .i file), we may not be able
83  // to fit an arbitrary position in the file in the FilePos field.  To handle
84  // this, we create one FileID for each chunk of the file that fits in a
85  // FilePos field.
86  unsigned FileSize = File->Buffer->getBufferSize();
87  if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
88    FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File));
89    assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
90           "Ran out of file ID's!");
91    return FileIDs.size();
92  }
93
94  // Create one FileID for each chunk of the file.
95  unsigned Result = FileIDs.size()+1;
96
97  unsigned ChunkNo = 0;
98  while (1) {
99    FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File));
100
101    if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
102    FileSize -= (1 << SourceLocation::FilePosBits);
103  }
104
105  assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
106         "Ran out of file ID's!");
107  return Result;
108}
109
110/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
111/// that a token from physloc PhysLoc should actually be referenced from
112/// InstantiationLoc.
113SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
114                                                  SourceLocation InstantLoc) {
115  // The specified source location may be a mapped location, due to a macro
116  // instantiation or #line directive.  Strip off this information to find out
117  // where the characters are actually located.
118  PhysLoc = getPhysicalLoc(PhysLoc);
119
120  // Resolve InstantLoc down to a real logical location.
121  InstantLoc = getLogicalLoc(InstantLoc);
122
123
124  // If the last macro id is close to the currently requested location, try to
125  // reuse it.  This implements a small cache.
126  for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
127    MacroIDInfo &LastOne = MacroIDs[i];
128
129    // The instanitation point and source physloc have to exactly match to reuse
130    // (for now).  We could allow "nearby" instantiations in the future.
131    if (LastOne.getVirtualLoc() != InstantLoc ||
132        LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID())
133      continue;
134
135    // Check to see if the physloc of the token came from near enough to reuse.
136    int PhysDelta = PhysLoc.getRawFilePos() -
137                    LastOne.getPhysicalLoc().getRawFilePos();
138    if (SourceLocation::isValidMacroPhysOffs(PhysDelta))
139      return SourceLocation::getMacroLoc(i, PhysDelta);
140  }
141
142
143  MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
144  return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0);
145}
146
147/// getBufferData - Return a pointer to the start and end of the character
148/// data for the specified FileID.
149std::pair<const char*, const char*>
150SourceManager::getBufferData(unsigned FileID) const {
151  const llvm::MemoryBuffer *Buf = getBuffer(FileID);
152  return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
153}
154
155
156/// getCharacterData - Return a pointer to the start of the specified location
157/// in the appropriate MemoryBuffer.
158const char *SourceManager::getCharacterData(SourceLocation SL) const {
159  // Note that this is a hot function in the getSpelling() path, which is
160  // heavily used by -E mode.
161  SL = getPhysicalLoc(SL);
162
163  return getContentCache(SL.getFileID())->Buffer->getBufferStart() +
164         getFullFilePos(SL);
165}
166
167
168/// getColumnNumber - Return the column # for the specified file position.
169/// this is significantly cheaper to compute than the line number.  This returns
170/// zero if the column number isn't known.
171unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
172  unsigned FileID = Loc.getFileID();
173  if (FileID == 0) return 0;
174
175  unsigned FilePos = getFullFilePos(Loc);
176  const MemoryBuffer *Buffer = getBuffer(FileID);
177  const char *Buf = Buffer->getBufferStart();
178
179  unsigned LineStart = FilePos;
180  while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
181    --LineStart;
182  return FilePos-LineStart+1;
183}
184
185/// getSourceName - This method returns the name of the file or buffer that
186/// the SourceLocation specifies.  This can be modified with #line directives,
187/// etc.
188const char *SourceManager::getSourceName(SourceLocation Loc) const {
189  unsigned FileID = Loc.getFileID();
190  if (FileID == 0) return "";
191  return getContentCache(FileID)->Buffer->getBufferIdentifier();
192}
193
194static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE;
195static void ComputeLineNumbers(ContentCache* FI) {
196  const MemoryBuffer *Buffer = FI->Buffer;
197
198  // Find the file offsets of all of the *physical* source lines.  This does
199  // not look at trigraphs, escaped newlines, or anything else tricky.
200  std::vector<unsigned> LineOffsets;
201
202  // Line #1 starts at char 0.
203  LineOffsets.push_back(0);
204
205  const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
206  const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
207  unsigned Offs = 0;
208  while (1) {
209    // Skip over the contents of the line.
210    // TODO: Vectorize this?  This is very performance sensitive for programs
211    // with lots of diagnostics and in -E mode.
212    const unsigned char *NextBuf = (const unsigned char *)Buf;
213    while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
214      ++NextBuf;
215    Offs += NextBuf-Buf;
216    Buf = NextBuf;
217
218    if (Buf[0] == '\n' || Buf[0] == '\r') {
219      // If this is \n\r or \r\n, skip both characters.
220      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
221        ++Offs, ++Buf;
222      ++Offs, ++Buf;
223      LineOffsets.push_back(Offs);
224    } else {
225      // Otherwise, this is a null.  If end of file, exit.
226      if (Buf == End) break;
227      // Otherwise, skip the null.
228      ++Offs, ++Buf;
229    }
230  }
231
232  // Copy the offsets into the FileInfo structure.
233  FI->NumLines = LineOffsets.size();
234  FI->SourceLineCache = new unsigned[LineOffsets.size()];
235  std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
236}
237
238/// getLineNumber - Given a SourceLocation, return the physical line number
239/// for the position indicated.  This requires building and caching a table of
240/// line offsets for the MemoryBuffer, so this is not cheap: use only when
241/// about to emit a diagnostic.
242unsigned SourceManager::getLineNumber(SourceLocation Loc) {
243  unsigned FileID = Loc.getFileID();
244  if (FileID == 0) return 0;
245
246  ContentCache* Content;
247
248  if (LastLineNoFileIDQuery == FileID)
249    Content = LastLineNoContentCache;
250  else
251    Content = const_cast<ContentCache*>(getContentCache(FileID));
252
253  // If this is the first use of line information for this buffer, compute the
254  /// SourceLineCache for it on demand.
255  if (Content->SourceLineCache == 0)
256    ComputeLineNumbers(Content);
257
258  // Okay, we know we have a line number table.  Do a binary search to find the
259  // line number that this character position lands on.
260  unsigned *SourceLineCache = Content->SourceLineCache;
261  unsigned *SourceLineCacheStart = SourceLineCache;
262  unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
263
264  unsigned QueriedFilePos = getFullFilePos(Loc)+1;
265
266  // If the previous query was to the same file, we know both the file pos from
267  // that query and the line number returned.  This allows us to narrow the
268  // search space from the entire file to something near the match.
269  if (LastLineNoFileIDQuery == FileID) {
270    if (QueriedFilePos >= LastLineNoFilePos) {
271      SourceLineCache = SourceLineCache+LastLineNoResult-1;
272
273      // The query is likely to be nearby the previous one.  Here we check to
274      // see if it is within 5, 10 or 20 lines.  It can be far away in cases
275      // where big comment blocks and vertical whitespace eat up lines but
276      // contribute no tokens.
277      if (SourceLineCache+5 < SourceLineCacheEnd) {
278        if (SourceLineCache[5] > QueriedFilePos)
279          SourceLineCacheEnd = SourceLineCache+5;
280        else if (SourceLineCache+10 < SourceLineCacheEnd) {
281          if (SourceLineCache[10] > QueriedFilePos)
282            SourceLineCacheEnd = SourceLineCache+10;
283          else if (SourceLineCache+20 < SourceLineCacheEnd) {
284            if (SourceLineCache[20] > QueriedFilePos)
285              SourceLineCacheEnd = SourceLineCache+20;
286          }
287        }
288      }
289    } else {
290      SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
291    }
292  }
293
294  // If the spread is large, do a "radix" test as our initial guess, based on
295  // the assumption that lines average to approximately the same length.
296  // NOTE: This is currently disabled, as it does not appear to be profitable in
297  // initial measurements.
298  if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
299    unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
300
301    // Take a stab at guessing where it is.
302    unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
303
304    // Check for -10 and +10 lines.
305    unsigned LowerBound = std::max(int(ApproxPos-10), 0);
306    unsigned UpperBound = std::min(ApproxPos+10, FileLen);
307
308    // If the computed lower bound is less than the query location, move it in.
309    if (SourceLineCache < SourceLineCacheStart+LowerBound &&
310        SourceLineCacheStart[LowerBound] < QueriedFilePos)
311      SourceLineCache = SourceLineCacheStart+LowerBound;
312
313    // If the computed upper bound is greater than the query location, move it.
314    if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
315        SourceLineCacheStart[UpperBound] >= QueriedFilePos)
316      SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
317  }
318
319  unsigned *Pos
320    = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
321  unsigned LineNo = Pos-SourceLineCacheStart;
322
323  LastLineNoFileIDQuery = FileID;
324  LastLineNoContentCache = Content;
325  LastLineNoFilePos = QueriedFilePos;
326  LastLineNoResult = LineNo;
327  return LineNo;
328}
329
330/// PrintStats - Print statistics to stderr.
331///
332void SourceManager::PrintStats() const {
333  llvm::cerr << "\n*** Source Manager Stats:\n";
334  llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
335             << " mem buffers mapped, " << FileIDs.size()
336             << " file ID's allocated.\n";
337  llvm::cerr << "  " << FileIDs.size() << " normal buffer FileID's, "
338             << MacroIDs.size() << " macro expansion FileID's.\n";
339
340  unsigned NumLineNumsComputed = 0;
341  unsigned NumFileBytesMapped = 0;
342  for (std::set<ContentCache>::const_iterator I =
343       FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
344    NumLineNumsComputed += I->SourceLineCache != 0;
345    NumFileBytesMapped  += I->Buffer->getBufferSize();
346  }
347
348  llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
349             << NumLineNumsComputed << " files with line #'s computed.\n";
350}
351
352//===----------------------------------------------------------------------===//
353// Serialization.
354//===----------------------------------------------------------------------===//
355
356void ContentCache::Emit(llvm::Serializer& S) const {
357  S.FlushRecord();
358  S.EmitPtr(this);
359
360  if (Entry) {
361    llvm::sys::Path Fname(Buffer->getBufferIdentifier());
362
363    if (Fname.isAbsolute())
364      S.EmitCStr(Fname.c_str());
365    else {
366      // Create an absolute path.
367      // FIXME: This will potentially contain ".." and "." in the path.
368      llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
369      path.appendComponent(Fname.c_str());
370      S.EmitCStr(path.c_str());
371    }
372  }
373  else {
374    const char* p = Buffer->getBufferStart();
375    const char* e = Buffer->getBufferEnd();
376
377    S.EmitInt(e-p);
378
379    for ( ; p != e; ++p)
380      S.EmitInt(*p);
381  }
382
383  S.FlushRecord();
384}
385
386void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
387                                       SourceManager& SMgr,
388                                       FileManager* FMgr,
389                                       std::vector<char>& Buf) {
390  if (FMgr) {
391    llvm::SerializedPtrID PtrID = D.ReadPtrID();
392    D.ReadCStr(Buf,false);
393
394    // Create/fetch the FileEntry.
395    const char* start = &Buf[0];
396    const FileEntry* E = FMgr->getFile(start,start+Buf.size());
397
398    // FIXME: Ideally we want a lazy materialization of the ContentCache
399    //  anyway, because we don't want to read in source files unless this
400    //  is absolutely needed.
401    if (!E)
402      D.RegisterPtr(PtrID,NULL);
403    else
404      // Get the ContextCache object and register it with the deserializer.
405      D.RegisterPtr(PtrID,SMgr.getContentCache(E));
406  }
407  else {
408    // Register the ContextCache object with the deserializer.
409    SMgr.MemBufferInfos.push_back(ContentCache());
410    ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
411    D.RegisterPtr(&Entry);
412
413    // Create the buffer.
414    unsigned Size = D.ReadInt();
415    Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
416
417    // Read the contents of the buffer.
418    char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
419    for (unsigned i = 0; i < Size ; ++i)
420      p[i] = D.ReadInt();
421  }
422}
423
424void FileIDInfo::Emit(llvm::Serializer& S) const {
425  S.Emit(IncludeLoc);
426  S.EmitInt(ChunkNo);
427  S.EmitPtr(Content);
428}
429
430FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) {
431  FileIDInfo I;
432  I.IncludeLoc = SourceLocation::ReadVal(D);
433  I.ChunkNo = D.ReadInt();
434  D.ReadPtr(I.Content,false);
435  return I;
436}
437
438void MacroIDInfo::Emit(llvm::Serializer& S) const {
439  S.Emit(VirtualLoc);
440  S.Emit(PhysicalLoc);
441}
442
443MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) {
444  MacroIDInfo I;
445  I.VirtualLoc = SourceLocation::ReadVal(D);
446  I.PhysicalLoc = SourceLocation::ReadVal(D);
447  return I;
448}
449
450void SourceManager::Emit(llvm::Serializer& S) const {
451  S.EnterBlock();
452  S.EmitPtr(this);
453  S.EmitInt(MainFileID);
454
455  // Emit: FileInfos.  Just emit the file name.
456  S.EnterBlock();
457
458  std::for_each(FileInfos.begin(),FileInfos.end(),
459                S.MakeEmitter<ContentCache>());
460
461  S.ExitBlock();
462
463  // Emit: MemBufferInfos
464  S.EnterBlock();
465
466  std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
467                S.MakeEmitter<ContentCache>());
468
469  S.ExitBlock();
470
471  // Emit: FileIDs
472  S.EmitInt(FileIDs.size());
473  std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>());
474
475  // Emit: MacroIDs
476  S.EmitInt(MacroIDs.size());
477  std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>());
478
479  S.ExitBlock();
480}
481
482SourceManager*
483SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){
484  SourceManager *M = new SourceManager();
485  D.RegisterPtr(M);
486
487  // Read: the FileID of the main source file of the translation unit.
488  M->MainFileID = D.ReadInt();
489
490  std::vector<char> Buf;
491
492  { // Read: FileInfos.
493    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
494    while (!D.FinishedBlock(BLoc))
495    ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
496  }
497
498  { // Read: MemBufferInfos.
499    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
500    while (!D.FinishedBlock(BLoc))
501    ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
502  }
503
504  // Read: FileIDs.
505  unsigned Size = D.ReadInt();
506  M->FileIDs.reserve(Size);
507  for (; Size > 0 ; --Size)
508    M->FileIDs.push_back(FileIDInfo::ReadVal(D));
509
510  // Read: MacroIDs.
511  Size = D.ReadInt();
512  M->MacroIDs.reserve(Size);
513  for (; Size > 0 ; --Size)
514    M->MacroIDs.push_back(MacroIDInfo::ReadVal(D));
515
516  return M;
517}
518