ASTUnit.h revision 03013fa9a0bf1ef4b907f5fec006c8f4000fdd21
1//===--- ASTUnit.h - ASTUnit utility ----------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// ASTUnit utility class.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_FRONTEND_ASTUNIT_H
15#define LLVM_CLANG_FRONTEND_ASTUNIT_H
16
17#include "clang/Index/ASTLocation.h"
18#include "clang/Serialization/ASTBitCodes.h"
19#include "clang/Sema/Sema.h"
20#include "clang/Sema/CodeCompleteConsumer.h"
21#include "clang/Lex/PreprocessingRecord.h"
22#include "clang/Basic/SourceManager.h"
23#include "clang/Basic/FileManager.h"
24#include "clang/Basic/FileSystemOptions.h"
25#include "clang-c/Index.h"
26#include "llvm/ADT/IntrusiveRefCntPtr.h"
27#include "llvm/ADT/OwningPtr.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringMap.h"
30#include "llvm/Support/Path.h"
31#include <map>
32#include <string>
33#include <vector>
34#include <cassert>
35#include <utility>
36#include <sys/types.h>
37
38namespace llvm {
39  class MemoryBuffer;
40}
41
42namespace clang {
43class ASTContext;
44class CodeCompleteConsumer;
45class CompilerInvocation;
46class Decl;
47class Diagnostic;
48class FileEntry;
49class FileManager;
50class HeaderSearch;
51class Preprocessor;
52class SourceManager;
53class TargetInfo;
54
55using namespace idx;
56
57/// \brief Utility class for loading a ASTContext from an AST file.
58///
59class ASTUnit {
60public:
61  typedef std::map<FileID, std::vector<PreprocessedEntity *> >
62    PreprocessedEntitiesByFileMap;
63
64private:
65  llvm::IntrusiveRefCntPtr<Diagnostic> Diagnostics;
66  llvm::OwningPtr<FileManager>      FileMgr;
67  llvm::OwningPtr<SourceManager>    SourceMgr;
68  llvm::OwningPtr<HeaderSearch>     HeaderInfo;
69  llvm::OwningPtr<TargetInfo>       Target;
70  llvm::OwningPtr<Preprocessor>     PP;
71  llvm::OwningPtr<ASTContext>       Ctx;
72
73  FileSystemOptions FileSystemOpts;
74
75  /// \brief The AST consumer that received information about the translation
76  /// unit as it was parsed or loaded.
77  llvm::OwningPtr<ASTConsumer> Consumer;
78
79  /// \brief The semantic analysis object used to type-check the translation
80  /// unit.
81  llvm::OwningPtr<Sema> TheSema;
82
83  /// Optional owned invocation, just used to make the invocation used in
84  /// LoadFromCommandLine available.
85  llvm::OwningPtr<CompilerInvocation> Invocation;
86
87  // OnlyLocalDecls - when true, walking this AST should only visit declarations
88  // that come from the AST itself, not from included precompiled headers.
89  // FIXME: This is temporary; eventually, CIndex will always do this.
90  bool                              OnlyLocalDecls;
91
92  /// \brief Whether to capture any diagnostics produced.
93  bool CaptureDiagnostics;
94
95  /// \brief Track whether the main file was loaded from an AST or not.
96  bool MainFileIsAST;
97
98  /// \brief Whether this AST represents a complete translation unit.
99  bool CompleteTranslationUnit;
100
101  /// \brief Whether we should time each operation.
102  bool WantTiming;
103
104  /// Track the top-level decls which appeared in an ASTUnit which was loaded
105  /// from a source file.
106  //
107  // FIXME: This is just an optimization hack to avoid deserializing large parts
108  // of a PCH file when using the Index library on an ASTUnit loaded from
109  // source. In the long term we should make the Index library use efficient and
110  // more scalable search mechanisms.
111  std::vector<Decl*> TopLevelDecls;
112
113  /// The name of the original source file used to generate this ASTUnit.
114  std::string OriginalSourceFile;
115
116  // Critical optimization when using clang_getCursor().
117  ASTLocation LastLoc;
118
119  /// \brief The set of diagnostics produced when creating this
120  /// translation unit.
121  llvm::SmallVector<StoredDiagnostic, 4> StoredDiagnostics;
122
123  /// \brief The number of stored diagnostics that come from the driver
124  /// itself.
125  ///
126  /// Diagnostics that come from the driver are retained from one parse to
127  /// the next.
128  unsigned NumStoredDiagnosticsFromDriver;
129
130  /// \brief Temporary files that should be removed when the ASTUnit is
131  /// destroyed.
132  llvm::SmallVector<llvm::sys::Path, 4> TemporaryFiles;
133
134  /// \brief A mapping from file IDs to the set of preprocessed entities
135  /// stored in that file.
136  ///
137  /// FIXME: This is just an optimization hack to avoid searching through
138  /// many preprocessed entities during cursor traversal in the CIndex library.
139  /// Ideally, we would just be able to perform a binary search within the
140  /// list of preprocessed entities.
141  PreprocessedEntitiesByFileMap PreprocessedEntitiesByFile;
142
143  /// \brief Simple hack to allow us to assert that ASTUnit is not being
144  /// used concurrently, which is not supported.
145  ///
146  /// Clients should create instances of the ConcurrencyCheck class whenever
147  /// using the ASTUnit in a way that isn't intended to be concurrent, which is
148  /// just about any usage.
149  unsigned int ConcurrencyCheckValue;
150  static const unsigned int CheckLocked = 28573289;
151  static const unsigned int CheckUnlocked = 9803453;
152
153  /// \brief Counter that determines when we want to try building a
154  /// precompiled preamble.
155  ///
156  /// If zero, we will never build a precompiled preamble. Otherwise,
157  /// it's treated as a counter that decrements each time we reparse
158  /// without the benefit of a precompiled preamble. When it hits 1,
159  /// we'll attempt to rebuild the precompiled header. This way, if
160  /// building the precompiled preamble fails, we won't try again for
161  /// some number of calls.
162  unsigned PreambleRebuildCounter;
163
164  /// \brief The file in which the precompiled preamble is stored.
165  std::string PreambleFile;
166
167  /// \brief The contents of the preamble that has been precompiled to
168  /// \c PreambleFile.
169  std::vector<char> Preamble;
170
171  /// \brief Whether the preamble ends at the start of a new line.
172  ///
173  /// Used to inform the lexer as to whether it's starting at the beginning of
174  /// a line after skipping the preamble.
175  bool PreambleEndsAtStartOfLine;
176
177  /// \brief The size of the source buffer that we've reserved for the main
178  /// file within the precompiled preamble.
179  unsigned PreambleReservedSize;
180
181  /// \brief Keeps track of the files that were used when computing the
182  /// preamble, with both their buffer size and their modification time.
183  ///
184  /// If any of the files have changed from one compile to the next,
185  /// the preamble must be thrown away.
186  llvm::StringMap<std::pair<off_t, time_t> > FilesInPreamble;
187
188  /// \brief When non-NULL, this is the buffer used to store the contents of
189  /// the main file when it has been padded for use with the precompiled
190  /// preamble.
191  llvm::MemoryBuffer *SavedMainFileBuffer;
192
193  /// \brief When non-NULL, this is the buffer used to store the
194  /// contents of the preamble when it has been padded to build the
195  /// precompiled preamble.
196  llvm::MemoryBuffer *PreambleBuffer;
197
198  /// \brief The number of warnings that occurred while parsing the preamble.
199  ///
200  /// This value will be used to restore the state of the \c Diagnostic object
201  /// when re-using the precompiled preamble. Note that only the
202  /// number of warnings matters, since we will not save the preamble
203  /// when any errors are present.
204  unsigned NumWarningsInPreamble;
205
206  /// \brief The number of diagnostics that were stored when parsing
207  /// the precompiled preamble.
208  ///
209  /// This value is used to determine how many of the stored
210  /// diagnostics should be retained when reparsing in the presence of
211  /// a precompiled preamble.
212  unsigned NumStoredDiagnosticsInPreamble;
213
214  /// \brief A list of the serialization ID numbers for each of the top-level
215  /// declarations parsed within the precompiled preamble.
216  std::vector<serialization::DeclID> TopLevelDeclsInPreamble;
217
218  /// \brief Whether we should be caching code-completion results.
219  bool ShouldCacheCodeCompletionResults;
220
221  static void ConfigureDiags(llvm::IntrusiveRefCntPtr<Diagnostic> &Diags,
222                             ASTUnit &AST, bool CaptureDiagnostics);
223
224public:
225  /// \brief A cached code-completion result, which may be introduced in one of
226  /// many different contexts.
227  struct CachedCodeCompletionResult {
228    /// \brief The code-completion string corresponding to this completion
229    /// result.
230    CodeCompletionString *Completion;
231
232    /// \brief A bitmask that indicates which code-completion contexts should
233    /// contain this completion result.
234    ///
235    /// The bits in the bitmask correspond to the values of
236    /// CodeCompleteContext::Kind. To map from a completion context kind to a
237    /// bit, subtract one from the completion context kind and shift 1 by that
238    /// number of bits. Many completions can occur in several different
239    /// contexts.
240    unsigned ShowInContexts;
241
242    /// \brief The priority given to this code-completion result.
243    unsigned Priority;
244
245    /// \brief The libclang cursor kind corresponding to this code-completion
246    /// result.
247    CXCursorKind Kind;
248
249    /// \brief The availability of this code-completion result.
250    CXAvailabilityKind Availability;
251
252    /// \brief The simplified type class for a non-macro completion result.
253    SimplifiedTypeClass TypeClass;
254
255    /// \brief The type of a non-macro completion result, stored as a unique
256    /// integer used by the string map of cached completion types.
257    ///
258    /// This value will be zero if the type is not known, or a unique value
259    /// determined by the formatted type string. Se \c CachedCompletionTypes
260    /// for more information.
261    unsigned Type;
262  };
263
264  /// \brief Retrieve the mapping from formatted type names to unique type
265  /// identifiers.
266  llvm::StringMap<unsigned> &getCachedCompletionTypes() {
267    return CachedCompletionTypes;
268  }
269
270private:
271  /// \brief The set of cached code-completion results.
272  std::vector<CachedCodeCompletionResult> CachedCompletionResults;
273
274  /// \brief A mapping from the formatted type name to a unique number for that
275  /// type, which is used for type equality comparisons.
276  llvm::StringMap<unsigned> CachedCompletionTypes;
277
278  /// \brief The number of top-level declarations present the last time we
279  /// cached code-completion results.
280  ///
281  /// The value is used to help detect when we should repopulate the global
282  /// completion cache.
283  unsigned NumTopLevelDeclsAtLastCompletionCache;
284
285  /// \brief The number of reparses left until we'll consider updating the
286  /// code-completion cache.
287  ///
288  /// This is meant to avoid thrashing during reparsing, by not allowing the
289  /// code-completion cache to be updated on every reparse.
290  unsigned CacheCodeCompletionCoolDown;
291
292  /// \brief Bit used by CIndex to mark when a translation unit may be in an
293  /// inconsistent state, and is not safe to free.
294  unsigned UnsafeToFree : 1;
295
296  /// \brief Cache any "global" code-completion results, so that we can avoid
297  /// recomputing them with each completion.
298  void CacheCodeCompletionResults();
299
300  /// \brief Clear out and deallocate
301  void ClearCachedCompletionResults();
302
303  ASTUnit(const ASTUnit&); // DO NOT IMPLEMENT
304  ASTUnit &operator=(const ASTUnit &); // DO NOT IMPLEMENT
305
306  explicit ASTUnit(bool MainFileIsAST);
307
308  void CleanTemporaryFiles();
309  bool Parse(llvm::MemoryBuffer *OverrideMainBuffer);
310
311  std::pair<llvm::MemoryBuffer *, std::pair<unsigned, bool> >
312  ComputePreamble(CompilerInvocation &Invocation,
313                  unsigned MaxLines, bool &CreatedBuffer);
314
315  llvm::MemoryBuffer *getMainBufferWithPrecompiledPreamble(
316                                         CompilerInvocation PreambleInvocation,
317                                                     bool AllowRebuild = true,
318                                                        unsigned MaxLines = 0);
319  void RealizeTopLevelDeclsFromPreamble();
320
321public:
322  class ConcurrencyCheck {
323    volatile ASTUnit &Self;
324
325  public:
326    explicit ConcurrencyCheck(ASTUnit &Self)
327      : Self(Self)
328    {
329      assert(Self.ConcurrencyCheckValue == CheckUnlocked &&
330             "Concurrent access to ASTUnit!");
331      Self.ConcurrencyCheckValue = CheckLocked;
332    }
333
334    ~ConcurrencyCheck() {
335      Self.ConcurrencyCheckValue = CheckUnlocked;
336    }
337  };
338  friend class ConcurrencyCheck;
339
340  ~ASTUnit();
341
342  bool isMainFileAST() const { return MainFileIsAST; }
343
344  bool isUnsafeToFree() const { return UnsafeToFree; }
345  void setUnsafeToFree(bool Value) { UnsafeToFree = Value; }
346
347  const Diagnostic &getDiagnostics() const { return *Diagnostics; }
348  Diagnostic &getDiagnostics()             { return *Diagnostics; }
349
350  const SourceManager &getSourceManager() const { return *SourceMgr; }
351        SourceManager &getSourceManager()       { return *SourceMgr; }
352
353  const Preprocessor &getPreprocessor() const { return *PP.get(); }
354        Preprocessor &getPreprocessor()       { return *PP.get(); }
355
356  const ASTContext &getASTContext() const { return *Ctx.get(); }
357        ASTContext &getASTContext()       { return *Ctx.get(); }
358
359  bool hasSema() const { return TheSema; }
360  Sema &getSema() const {
361    assert(TheSema && "ASTUnit does not have a Sema object!");
362    return *TheSema;
363  }
364
365  const FileManager &getFileManager() const { return *FileMgr; }
366        FileManager &getFileManager()       { return *FileMgr; }
367
368  const FileSystemOptions &getFileSystemOpts() const { return FileSystemOpts; }
369
370  const std::string &getOriginalSourceFileName();
371  const std::string &getASTFileName();
372
373  /// \brief Add a temporary file that the ASTUnit depends on.
374  ///
375  /// This file will be erased when the ASTUnit is destroyed.
376  void addTemporaryFile(const llvm::sys::Path &TempFile) {
377    TemporaryFiles.push_back(TempFile);
378  }
379
380  bool getOnlyLocalDecls() const { return OnlyLocalDecls; }
381
382  /// \brief Retrieve the maximum PCH level of declarations that a
383  /// traversal of the translation unit should consider.
384  unsigned getMaxPCHLevel() const;
385
386  void setLastASTLocation(ASTLocation ALoc) { LastLoc = ALoc; }
387  ASTLocation getLastASTLocation() const { return LastLoc; }
388
389
390  llvm::StringRef getMainFileName() const;
391
392  typedef std::vector<Decl *>::iterator top_level_iterator;
393
394  top_level_iterator top_level_begin() {
395    assert(!isMainFileAST() && "Invalid call for AST based ASTUnit!");
396    if (!TopLevelDeclsInPreamble.empty())
397      RealizeTopLevelDeclsFromPreamble();
398    return TopLevelDecls.begin();
399  }
400
401  top_level_iterator top_level_end() {
402    assert(!isMainFileAST() && "Invalid call for AST based ASTUnit!");
403    if (!TopLevelDeclsInPreamble.empty())
404      RealizeTopLevelDeclsFromPreamble();
405    return TopLevelDecls.end();
406  }
407
408  std::size_t top_level_size() const {
409    assert(!isMainFileAST() && "Invalid call for AST based ASTUnit!");
410    return TopLevelDeclsInPreamble.size() + TopLevelDecls.size();
411  }
412
413  bool top_level_empty() const {
414    assert(!isMainFileAST() && "Invalid call for AST based ASTUnit!");
415    return TopLevelDeclsInPreamble.empty() && TopLevelDecls.empty();
416  }
417
418  /// \brief Add a new top-level declaration.
419  void addTopLevelDecl(Decl *D) {
420    TopLevelDecls.push_back(D);
421  }
422
423  /// \brief Add a new top-level declaration, identified by its ID in
424  /// the precompiled preamble.
425  void addTopLevelDeclFromPreamble(serialization::DeclID D) {
426    TopLevelDeclsInPreamble.push_back(D);
427  }
428
429  /// \brief Retrieve the mapping from File IDs to the preprocessed entities
430  /// within that file.
431  PreprocessedEntitiesByFileMap &getPreprocessedEntitiesByFile() {
432    return PreprocessedEntitiesByFile;
433  }
434
435  // Retrieve the diagnostics associated with this AST
436  typedef const StoredDiagnostic *stored_diag_iterator;
437  stored_diag_iterator stored_diag_begin() const {
438    return StoredDiagnostics.begin();
439  }
440  stored_diag_iterator stored_diag_end() const {
441    return StoredDiagnostics.end();
442  }
443  unsigned stored_diag_size() const { return StoredDiagnostics.size(); }
444
445  llvm::SmallVector<StoredDiagnostic, 4> &getStoredDiagnostics() {
446    return StoredDiagnostics;
447  }
448
449  typedef std::vector<CachedCodeCompletionResult>::iterator
450    cached_completion_iterator;
451
452  cached_completion_iterator cached_completion_begin() {
453    return CachedCompletionResults.begin();
454  }
455
456  cached_completion_iterator cached_completion_end() {
457    return CachedCompletionResults.end();
458  }
459
460  unsigned cached_completion_size() const {
461    return CachedCompletionResults.size();
462  }
463
464  llvm::MemoryBuffer *getBufferForFile(llvm::StringRef Filename,
465                                       std::string *ErrorStr = 0);
466
467  /// \brief Whether this AST represents a complete translation unit.
468  ///
469  /// If false, this AST is only a partial translation unit, e.g., one
470  /// that might still be used as a precompiled header or preamble.
471  bool isCompleteTranslationUnit() const { return CompleteTranslationUnit; }
472
473  /// \brief A mapping from a file name to the memory buffer that stores the
474  /// remapped contents of that file.
475  typedef std::pair<std::string, const llvm::MemoryBuffer *> RemappedFile;
476
477  /// \brief Create a ASTUnit from an AST file.
478  ///
479  /// \param Filename - The AST file to load.
480  ///
481  /// \param Diags - The diagnostics engine to use for reporting errors; its
482  /// lifetime is expected to extend past that of the returned ASTUnit.
483  ///
484  /// \returns - The initialized ASTUnit or null if the AST failed to load.
485  static ASTUnit *LoadFromASTFile(const std::string &Filename,
486                                  llvm::IntrusiveRefCntPtr<Diagnostic> Diags,
487                                  const FileSystemOptions &FileSystemOpts,
488                                  bool OnlyLocalDecls = false,
489                                  RemappedFile *RemappedFiles = 0,
490                                  unsigned NumRemappedFiles = 0,
491                                  bool CaptureDiagnostics = false);
492
493private:
494  /// \brief Helper function for \c LoadFromCompilerInvocation() and
495  /// \c LoadFromCommandLine(), which loads an AST from a compiler invocation.
496  ///
497  /// \param PrecompilePreamble Whether to precompile the preamble of this
498  /// translation unit, to improve the performance of reparsing.
499  ///
500  /// \returns \c true if a catastrophic failure occurred (which means that the
501  /// \c ASTUnit itself is invalid), or \c false otherwise.
502  bool LoadFromCompilerInvocation(bool PrecompilePreamble);
503
504public:
505
506  /// LoadFromCompilerInvocation - Create an ASTUnit from a source file, via a
507  /// CompilerInvocation object.
508  ///
509  /// \param CI - The compiler invocation to use; it must have exactly one input
510  /// source file. The ASTUnit takes ownership of the CompilerInvocation object.
511  ///
512  /// \param Diags - The diagnostics engine to use for reporting errors; its
513  /// lifetime is expected to extend past that of the returned ASTUnit.
514  //
515  // FIXME: Move OnlyLocalDecls, UseBumpAllocator to setters on the ASTUnit, we
516  // shouldn't need to specify them at construction time.
517  static ASTUnit *LoadFromCompilerInvocation(CompilerInvocation *CI,
518                                     llvm::IntrusiveRefCntPtr<Diagnostic> Diags,
519                                             bool OnlyLocalDecls = false,
520                                             bool CaptureDiagnostics = false,
521                                             bool PrecompilePreamble = false,
522                                          bool CompleteTranslationUnit = true,
523                                       bool CacheCodeCompletionResults = false);
524
525  /// LoadFromCommandLine - Create an ASTUnit from a vector of command line
526  /// arguments, which must specify exactly one source file.
527  ///
528  /// \param ArgBegin - The beginning of the argument vector.
529  ///
530  /// \param ArgEnd - The end of the argument vector.
531  ///
532  /// \param Diags - The diagnostics engine to use for reporting errors; its
533  /// lifetime is expected to extend past that of the returned ASTUnit.
534  ///
535  /// \param ResourceFilesPath - The path to the compiler resource files.
536  //
537  // FIXME: Move OnlyLocalDecls, UseBumpAllocator to setters on the ASTUnit, we
538  // shouldn't need to specify them at construction time.
539  static ASTUnit *LoadFromCommandLine(const char **ArgBegin,
540                                      const char **ArgEnd,
541                                    llvm::IntrusiveRefCntPtr<Diagnostic> Diags,
542                                      llvm::StringRef ResourceFilesPath,
543                                      bool OnlyLocalDecls = false,
544                                      bool CaptureDiagnostics = false,
545                                      RemappedFile *RemappedFiles = 0,
546                                      unsigned NumRemappedFiles = 0,
547                                      bool PrecompilePreamble = false,
548                                      bool CompleteTranslationUnit = true,
549                                      bool CacheCodeCompletionResults = false,
550                                      bool CXXPrecompilePreamble = false,
551                                      bool CXXChainedPCH = false);
552
553  /// \brief Reparse the source files using the same command-line options that
554  /// were originally used to produce this translation unit.
555  ///
556  /// \returns True if a failure occurred that causes the ASTUnit not to
557  /// contain any translation-unit information, false otherwise.
558  bool Reparse(RemappedFile *RemappedFiles = 0,
559               unsigned NumRemappedFiles = 0);
560
561  /// \brief Perform code completion at the given file, line, and
562  /// column within this translation unit.
563  ///
564  /// \param File The file in which code completion will occur.
565  ///
566  /// \param Line The line at which code completion will occur.
567  ///
568  /// \param Column The column at which code completion will occur.
569  ///
570  /// \param IncludeMacros Whether to include macros in the code-completion
571  /// results.
572  ///
573  /// \param IncludeCodePatterns Whether to include code patterns (such as a
574  /// for loop) in the code-completion results.
575  ///
576  /// FIXME: The Diag, LangOpts, SourceMgr, FileMgr, StoredDiagnostics, and
577  /// OwnedBuffers parameters are all disgusting hacks. They will go away.
578  void CodeComplete(llvm::StringRef File, unsigned Line, unsigned Column,
579                    RemappedFile *RemappedFiles, unsigned NumRemappedFiles,
580                    bool IncludeMacros, bool IncludeCodePatterns,
581                    CodeCompleteConsumer &Consumer,
582                    Diagnostic &Diag, LangOptions &LangOpts,
583                    SourceManager &SourceMgr, FileManager &FileMgr,
584                    llvm::SmallVectorImpl<StoredDiagnostic> &StoredDiagnostics,
585              llvm::SmallVectorImpl<const llvm::MemoryBuffer *> &OwnedBuffers);
586
587  /// \brief Save this translation unit to a file with the given name.
588  ///
589  /// \returns True if an error occurred, false otherwise.
590  bool Save(llvm::StringRef File);
591};
592
593} // namespace clang
594
595#endif
596