ASTReader.h revision bef1a7b9c175d37e4a727e6ce68bd05232fa6970
1//===--- ASTReader.h - AST File Reader --------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the ASTReader class, which reads AST files.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_FRONTEND_AST_READER_H
15#define LLVM_CLANG_FRONTEND_AST_READER_H
16
17#include "clang/Serialization/ASTBitCodes.h"
18#include "clang/Sema/ExternalSemaSource.h"
19#include "clang/AST/DeclarationName.h"
20#include "clang/AST/DeclObjC.h"
21#include "clang/AST/TemplateBase.h"
22#include "clang/Lex/ExternalPreprocessorSource.h"
23#include "clang/Lex/PreprocessingRecord.h"
24#include "clang/Basic/Diagnostic.h"
25#include "clang/Basic/IdentifierTable.h"
26#include "clang/Basic/SourceManager.h"
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/APInt.h"
29#include "llvm/ADT/APSInt.h"
30#include "llvm/ADT/OwningPtr.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/StringRef.h"
33#include "llvm/Bitcode/BitstreamReader.h"
34#include "llvm/System/DataTypes.h"
35#include <deque>
36#include <map>
37#include <string>
38#include <utility>
39#include <vector>
40
41namespace llvm {
42  class MemoryBuffer;
43}
44
45namespace clang {
46
47class AddrLabelExpr;
48class ASTConsumer;
49class ASTContext;
50class ASTIdentifierIterator;
51class Attr;
52class Decl;
53class DeclContext;
54class NestedNameSpecifier;
55class CXXBaseSpecifier;
56class CXXBaseOrMemberInitializer;
57class GotoStmt;
58class LabelStmt;
59class MacroDefinition;
60class NamedDecl;
61class Preprocessor;
62class Sema;
63class SwitchCase;
64class ASTDeserializationListener;
65class ASTReader;
66class ASTDeclReader;
67class ASTStmtReader;
68class ASTIdentifierLookupTrait;
69class TypeLocReader;
70struct HeaderFileInfo;
71
72struct PCHPredefinesBlock {
73  /// \brief The file ID for this predefines buffer in a PCH file.
74  FileID BufferID;
75
76  /// \brief This predefines buffer in a PCH file.
77  llvm::StringRef Data;
78};
79typedef llvm::SmallVector<PCHPredefinesBlock, 2> PCHPredefinesBlocks;
80
81/// \brief Abstract interface for callback invocations by the ASTReader.
82///
83/// While reading an AST file, the ASTReader will call the methods of the
84/// listener to pass on specific information. Some of the listener methods can
85/// return true to indicate to the ASTReader that the information (and
86/// consequently the AST file) is invalid.
87class ASTReaderListener {
88public:
89  virtual ~ASTReaderListener();
90
91  /// \brief Receives the language options.
92  ///
93  /// \returns true to indicate the options are invalid or false otherwise.
94  virtual bool ReadLanguageOptions(const LangOptions &LangOpts) {
95    return false;
96  }
97
98  /// \brief Receives the target triple.
99  ///
100  /// \returns true to indicate the target triple is invalid or false otherwise.
101  virtual bool ReadTargetTriple(llvm::StringRef Triple) {
102    return false;
103  }
104
105  /// \brief Receives the contents of the predefines buffer.
106  ///
107  /// \param Buffers Information about the predefines buffers.
108  ///
109  /// \param OriginalFileName The original file name for the AST file, which
110  /// will appear as an entry in the predefines buffer.
111  ///
112  /// \param SuggestedPredefines If necessary, additional definitions are added
113  /// here.
114  ///
115  /// \returns true to indicate the predefines are invalid or false otherwise.
116  virtual bool ReadPredefinesBuffer(const PCHPredefinesBlocks &Buffers,
117                                    llvm::StringRef OriginalFileName,
118                                    std::string &SuggestedPredefines) {
119    return false;
120  }
121
122  /// \brief Receives a HeaderFileInfo entry.
123  virtual void ReadHeaderFileInfo(const HeaderFileInfo &HFI, unsigned ID) {}
124
125  /// \brief Receives __COUNTER__ value.
126  virtual void ReadCounter(unsigned Value) {}
127};
128
129/// \brief ASTReaderListener implementation to validate the information of
130/// the PCH file against an initialized Preprocessor.
131class PCHValidator : public ASTReaderListener {
132  Preprocessor &PP;
133  ASTReader &Reader;
134
135  unsigned NumHeaderInfos;
136
137public:
138  PCHValidator(Preprocessor &PP, ASTReader &Reader)
139    : PP(PP), Reader(Reader), NumHeaderInfos(0) {}
140
141  virtual bool ReadLanguageOptions(const LangOptions &LangOpts);
142  virtual bool ReadTargetTriple(llvm::StringRef Triple);
143  virtual bool ReadPredefinesBuffer(const PCHPredefinesBlocks &Buffers,
144                                    llvm::StringRef OriginalFileName,
145                                    std::string &SuggestedPredefines);
146  virtual void ReadHeaderFileInfo(const HeaderFileInfo &HFI, unsigned ID);
147  virtual void ReadCounter(unsigned Value);
148
149private:
150  void Error(const char *Msg);
151};
152
153/// \brief Reads an AST files chain containing the contents of a translation
154/// unit.
155///
156/// The ASTReader class reads bitstreams (produced by the ASTWriter
157/// class) containing the serialized representation of a given
158/// abstract syntax tree and its supporting data structures. An
159/// instance of the ASTReader can be attached to an ASTContext object,
160/// which will provide access to the contents of the AST files.
161///
162/// The AST reader provides lazy de-serialization of declarations, as
163/// required when traversing the AST. Only those AST nodes that are
164/// actually required will be de-serialized.
165class ASTReader
166  : public ExternalPreprocessorSource,
167    public ExternalPreprocessingRecordSource,
168    public ExternalSemaSource,
169    public IdentifierInfoLookup,
170    public ExternalIdentifierLookup,
171    public ExternalSLocEntrySource {
172public:
173  enum ASTReadResult { Success, Failure, IgnorePCH };
174  /// \brief Types of AST files.
175  enum ASTFileType {
176    Module,   ///< File is a module proper.
177    PCH,      ///< File is a PCH file treated as such.
178    Preamble, ///< File is a PCH file treated as the preamble.
179    MainFile  ///< File is a PCH file treated as the actual main file.
180  };
181  friend class PCHValidator;
182  friend class ASTDeclReader;
183  friend class ASTStmtReader;
184  friend class ASTIdentifierIterator;
185  friend class ASTIdentifierLookupTrait;
186  friend class TypeLocReader;
187private:
188  /// \brief The receiver of some callbacks invoked by ASTReader.
189  llvm::OwningPtr<ASTReaderListener> Listener;
190
191  /// \brief The receiver of deserialization events.
192  ASTDeserializationListener *DeserializationListener;
193
194  SourceManager &SourceMgr;
195  FileManager &FileMgr;
196  Diagnostic &Diags;
197
198  /// \brief The semantic analysis object that will be processing the
199  /// AST files and the translation unit that uses it.
200  Sema *SemaObj;
201
202  /// \brief The preprocessor that will be loading the source file.
203  Preprocessor *PP;
204
205  /// \brief The AST context into which we'll read the AST files.
206  ASTContext *Context;
207
208  /// \brief The AST consumer.
209  ASTConsumer *Consumer;
210
211  /// \brief Information that is needed for every module.
212  struct PerFileData {
213    PerFileData(ASTFileType Ty);
214    ~PerFileData();
215
216    // === General information ===
217
218    /// \brief The type of this AST file.
219    ASTFileType Type;
220
221    /// \brief The file name of the AST file.
222    std::string FileName;
223
224    /// \brief The memory buffer that stores the data associated with
225    /// this AST file.
226    llvm::OwningPtr<llvm::MemoryBuffer> Buffer;
227
228    /// \brief The size of this file, in bits.
229    uint64_t SizeInBits;
230
231    /// \brief The bitstream reader from which we'll read the AST file.
232    llvm::BitstreamReader StreamFile;
233
234    /// \brief The main bitstream cursor for the main block.
235    llvm::BitstreamCursor Stream;
236
237    // === Source Locations ===
238
239    /// \brief Cursor used to read source location entries.
240    llvm::BitstreamCursor SLocEntryCursor;
241
242    /// \brief The number of source location entries in this AST file.
243    unsigned LocalNumSLocEntries;
244
245    /// \brief Offsets for all of the source location entries in the
246    /// AST file.
247    const uint32_t *SLocOffsets;
248
249    /// \brief The entire size of this module's source location offset range.
250    unsigned LocalSLocSize;
251
252    // === Identifiers ===
253
254    /// \brief The number of identifiers in this AST file.
255    unsigned LocalNumIdentifiers;
256
257    /// \brief Offsets into the identifier table data.
258    ///
259    /// This array is indexed by the identifier ID (-1), and provides
260    /// the offset into IdentifierTableData where the string data is
261    /// stored.
262    const uint32_t *IdentifierOffsets;
263
264    /// \brief Actual data for the on-disk hash table.
265    ///
266    /// This pointer points into a memory buffer, where the on-disk hash
267    /// table for identifiers actually lives.
268    const char *IdentifierTableData;
269
270    /// \brief A pointer to an on-disk hash table of opaque type
271    /// IdentifierHashTable.
272    void *IdentifierLookupTable;
273
274    // === Macros ===
275
276    /// \brief The cursor to the start of the preprocessor block, which stores
277    /// all of the macro definitions.
278    llvm::BitstreamCursor MacroCursor;
279
280    /// \brief The offset of the start of the set of defined macros.
281    uint64_t MacroStartOffset;
282
283    /// \brief The number of macro definitions in this file.
284    unsigned LocalNumMacroDefinitions;
285
286    /// \brief Offsets of all of the macro definitions in the preprocessing
287    /// record in the AST file.
288    const uint32_t *MacroDefinitionOffsets;
289
290    // === Selectors ===
291
292    /// \brief The number of selectors new to this file.
293    ///
294    /// This is the number of entries in SelectorOffsets.
295    unsigned LocalNumSelectors;
296
297    /// \brief Offsets into the selector lookup table's data array
298    /// where each selector resides.
299    const uint32_t *SelectorOffsets;
300
301    /// \brief A pointer to the character data that comprises the selector table
302    ///
303    /// The SelectorOffsets table refers into this memory.
304    const unsigned char *SelectorLookupTableData;
305
306    /// \brief A pointer to an on-disk hash table of opaque type
307    /// ASTSelectorLookupTable.
308    ///
309    /// This hash table provides the IDs of all selectors, and the associated
310    /// instance and factory methods.
311    void *SelectorLookupTable;
312
313    /// \brief Method selectors used in a @selector expression. Used for
314    /// implementation of -Wselector.
315    llvm::SmallVector<uint64_t, 64> ReferencedSelectorsData;
316
317    // === Declarations ===
318
319    /// DeclsCursor - This is a cursor to the start of the DECLS_BLOCK block. It
320    /// has read all the abbreviations at the start of the block and is ready to
321    /// jump around with these in context.
322    llvm::BitstreamCursor DeclsCursor;
323
324    /// \brief The number of declarations in this AST file.
325    unsigned LocalNumDecls;
326
327    /// \brief Offset of each declaration within the bitstream, indexed
328    /// by the declaration ID (-1).
329    const uint32_t *DeclOffsets;
330
331    /// \brief A snapshot of the pending instantiations in the chain.
332    ///
333    /// This record tracks the instantiations that Sema has to perform at the
334    /// end of the TU. It consists of a pair of values for every pending
335    /// instantiation where the first value is the ID of the decl and the second
336    /// is the instantiation location.
337    llvm::SmallVector<uint64_t, 64> PendingInstantiations;
338
339    // === Types ===
340
341    /// \brief The number of types in this AST file.
342    unsigned LocalNumTypes;
343
344    /// \brief Offset of each type within the bitstream, indexed by the
345    /// type ID, or the representation of a Type*.
346    const uint32_t *TypeOffsets;
347
348    // === Miscellaneous ===
349
350    /// \brief The AST stat cache installed for this file, if any.
351    ///
352    /// The dynamic type of this stat cache is always ASTStatCache
353    void *StatCache;
354
355    /// \brief The number of preallocated preprocessing entities in the
356    /// preprocessing record.
357    unsigned NumPreallocatedPreprocessingEntities;
358
359    /// \brief The next module in source order.
360    PerFileData *NextInSource;
361
362    /// \brief All the modules that loaded this one. Can contain NULL for
363    /// directly loaded modules.
364    llvm::SmallVector<PerFileData *, 1> Loaders;
365  };
366
367  /// \brief All loaded modules, indexed by name.
368  llvm::StringMap<PerFileData*> Modules;
369
370  /// \brief The first module in source order.
371  PerFileData *FirstInSource;
372
373  /// \brief The chain of AST files. The first entry is the one named by the
374  /// user, the last one is the one that doesn't depend on anything further.
375  /// That is, the entry I was created with -include-pch I+1.
376  llvm::SmallVector<PerFileData*, 2> Chain;
377
378  /// \brief SLocEntries that we're going to preload.
379  llvm::SmallVector<uint64_t, 64> PreloadSLocEntries;
380
381  /// \brief Types that have already been loaded from the chain.
382  ///
383  /// When the pointer at index I is non-NULL, the type with
384  /// ID = (I + 1) << FastQual::Width has already been loaded
385  std::vector<QualType> TypesLoaded;
386
387  /// \brief Map that provides the ID numbers of each type within the
388  /// output stream, plus those deserialized from a chained PCH.
389  ///
390  /// The ID numbers of types are consecutive (in order of discovery)
391  /// and start at 1. 0 is reserved for NULL. When types are actually
392  /// stored in the stream, the ID number is shifted by 2 bits to
393  /// allow for the const/volatile qualifiers.
394  ///
395  /// Keys in the map never have const/volatile qualifiers.
396  serialization::TypeIdxMap TypeIdxs;
397
398  /// \brief Declarations that have already been loaded from the chain.
399  ///
400  /// When the pointer at index I is non-NULL, the declaration with ID
401  /// = I + 1 has already been loaded.
402  std::vector<Decl *> DeclsLoaded;
403
404  typedef std::pair<PerFileData *, uint64_t> FileOffset;
405  typedef llvm::SmallVector<FileOffset, 2> FileOffsetsTy;
406  typedef llvm::DenseMap<serialization::DeclID, FileOffsetsTy>
407      DeclUpdateOffsetsMap;
408  /// \brief Declarations that have modifications residing in a later file
409  /// in the chain.
410  DeclUpdateOffsetsMap DeclUpdateOffsets;
411
412  typedef llvm::DenseMap<serialization::DeclID,
413                         std::pair<PerFileData *, uint64_t> >
414      DeclReplacementMap;
415  /// \brief Declarations that have been replaced in a later file in the chain.
416  DeclReplacementMap ReplacedDecls;
417
418  /// \brief Information about the contents of a DeclContext.
419  struct DeclContextInfo {
420    void *NameLookupTableData; // a ASTDeclContextNameLookupTable.
421    const serialization::KindDeclIDPair *LexicalDecls;
422    unsigned NumLexicalDecls;
423  };
424  // In a full chain, there could be multiple updates to every decl context,
425  // so this is a vector. However, typically a chain is only two elements long,
426  // with only one file containing updates, so there will be only one update
427  // per decl context.
428  typedef llvm::SmallVector<DeclContextInfo, 1> DeclContextInfos;
429  typedef llvm::DenseMap<const DeclContext *, DeclContextInfos>
430      DeclContextOffsetsMap;
431  // Updates for visible decls can occur for other contexts than just the
432  // TU, and when we read those update records, the actual context will not
433  // be available yet (unless it's the TU), so have this pending map using the
434  // ID as a key. It will be realized when the context is actually loaded.
435  typedef llvm::SmallVector<void *, 1> DeclContextVisibleUpdates;
436  typedef llvm::DenseMap<serialization::DeclID, DeclContextVisibleUpdates>
437      DeclContextVisibleUpdatesPending;
438
439  /// \brief Offsets of the lexical and visible declarations for each
440  /// DeclContext.
441  DeclContextOffsetsMap DeclContextOffsets;
442
443  /// \brief Updates to the visible declarations of declaration contexts that
444  /// haven't been loaded yet.
445  DeclContextVisibleUpdatesPending PendingVisibleUpdates;
446
447  typedef llvm::SmallVector<CXXRecordDecl *, 4> ForwardRefs;
448  typedef llvm::DenseMap<const CXXRecordDecl *, ForwardRefs>
449      PendingForwardRefsMap;
450  /// \brief Forward references that have a definition but the definition decl
451  /// is still initializing. When the definition gets read it will update
452  /// the DefinitionData pointer of all pending references.
453  PendingForwardRefsMap PendingForwardRefs;
454
455  typedef llvm::DenseMap<serialization::DeclID, serialization::DeclID>
456      FirstLatestDeclIDMap;
457  /// \brief Map of first declarations from a chained PCH that point to the
458  /// most recent declarations in another AST file.
459  FirstLatestDeclIDMap FirstLatestDeclIDs;
460
461  /// \brief Read the records that describe the contents of declcontexts.
462  bool ReadDeclContextStorage(llvm::BitstreamCursor &Cursor,
463                              const std::pair<uint64_t, uint64_t> &Offsets,
464                              DeclContextInfo &Info);
465
466  /// \brief A vector containing identifiers that have already been
467  /// loaded.
468  ///
469  /// If the pointer at index I is non-NULL, then it refers to the
470  /// IdentifierInfo for the identifier with ID=I+1 that has already
471  /// been loaded.
472  std::vector<IdentifierInfo *> IdentifiersLoaded;
473
474  /// \brief A vector containing selectors that have already been loaded.
475  ///
476  /// This vector is indexed by the Selector ID (-1). NULL selector
477  /// entries indicate that the particular selector ID has not yet
478  /// been loaded.
479  llvm::SmallVector<Selector, 16> SelectorsLoaded;
480
481  /// \brief The macro definitions we have already loaded.
482  llvm::SmallVector<MacroDefinition *, 16> MacroDefinitionsLoaded;
483
484  /// \name CodeGen-relevant special data
485  /// \brief Fields containing data that is relevant to CodeGen.
486  //@{
487
488  /// \brief The IDs of all declarations that fulfill the criteria of
489  /// "interesting" decls.
490  ///
491  /// This contains the data loaded from all EXTERNAL_DEFINITIONS blocks in the
492  /// chain. The referenced declarations are deserialized and passed to the
493  /// consumer eagerly.
494  llvm::SmallVector<uint64_t, 16> ExternalDefinitions;
495
496  /// \brief The IDs of all tentative definitions stored in the the chain.
497  ///
498  /// Sema keeps track of all tentative definitions in a TU because it has to
499  /// complete them and pass them on to CodeGen. Thus, tentative definitions in
500  /// the PCH chain must be eagerly deserialized.
501  llvm::SmallVector<uint64_t, 16> TentativeDefinitions;
502
503  /// \brief The IDs of all CXXRecordDecls stored in the chain whose VTables are
504  /// used.
505  ///
506  /// CodeGen has to emit VTables for these records, so they have to be eagerly
507  /// deserialized.
508  llvm::SmallVector<uint64_t, 64> VTableUses;
509
510  //@}
511
512  /// \name Diagnostic-relevant special data
513  /// \brief Fields containing data that is used for generating diagnostics
514  //@{
515
516  /// \brief A snapshot of Sema's unused file-scoped variable tracking, for
517  /// generating warnings.
518  llvm::SmallVector<uint64_t, 16> UnusedFileScopedDecls;
519
520  /// \brief A snapshot of Sema's weak undeclared identifier tracking, for
521  /// generating warnings.
522  llvm::SmallVector<uint64_t, 64> WeakUndeclaredIdentifiers;
523
524  /// \brief The IDs of type aliases for ext_vectors that exist in the chain.
525  ///
526  /// Used by Sema for finding sugared names for ext_vectors in diagnostics.
527  llvm::SmallVector<uint64_t, 4> ExtVectorDecls;
528
529  //@}
530
531  /// \name Sema-relevant special data
532  /// \brief Fields containing data that is used for semantic analysis
533  //@{
534
535  /// \brief The IDs of all locally scoped external decls in the chain.
536  ///
537  /// Sema tracks these to validate that the types are consistent across all
538  /// local external declarations.
539  llvm::SmallVector<uint64_t, 16> LocallyScopedExternalDecls;
540
541  /// \brief The IDs of all dynamic class declarations in the chain.
542  ///
543  /// Sema tracks these because it checks for the key functions being defined
544  /// at the end of the TU, in which case it directs CodeGen to emit the VTable.
545  llvm::SmallVector<uint64_t, 16> DynamicClasses;
546
547  /// \brief The IDs of the declarations Sema stores directly.
548  ///
549  /// Sema tracks a few important decls, such as namespace std, directly.
550  llvm::SmallVector<uint64_t, 4> SemaDeclRefs;
551
552  /// \brief The IDs of the types ASTContext stores directly.
553  ///
554  /// The AST context tracks a few important types, such as va_list, directly.
555  llvm::SmallVector<uint64_t, 16> SpecialTypes;
556
557  //@}
558
559  /// \brief The original file name that was used to build the primary AST file,
560  /// which may have been modified for relocatable-pch support.
561  std::string OriginalFileName;
562
563  /// \brief The actual original file name that was used to build the primary
564  /// AST file.
565  std::string ActualOriginalFileName;
566
567  /// \brief Whether this precompiled header is a relocatable PCH file.
568  bool RelocatablePCH;
569
570  /// \brief The system include root to be used when loading the
571  /// precompiled header.
572  const char *isysroot;
573
574  /// \brief Whether to disable the normal validation performed on precompiled
575  /// headers when they are loaded.
576  bool DisableValidation;
577
578  /// \brief Mapping from switch-case IDs in the chain to switch-case statements
579  ///
580  /// Statements usually don't have IDs, but switch cases need them, so that the
581  /// switch statement can refer to them.
582  std::map<unsigned, SwitchCase *> SwitchCaseStmts;
583
584  /// \brief Mapping from label statement IDs in the chain to label statements.
585  ///
586  /// Statements usually don't have IDs, but labeled statements need them, so
587  /// that goto statements and address-of-label expressions can refer to them.
588  std::map<unsigned, LabelStmt *> LabelStmts;
589
590  /// \brief Mapping from label IDs to the set of "goto" statements
591  /// that point to that label before the label itself has been
592  /// de-serialized.
593  std::multimap<unsigned, GotoStmt *> UnresolvedGotoStmts;
594
595  /// \brief Mapping from label IDs to the set of address label
596  /// expressions that point to that label before the label itself has
597  /// been de-serialized.
598  std::multimap<unsigned, AddrLabelExpr *> UnresolvedAddrLabelExprs;
599
600  /// \brief The number of stat() calls that hit/missed the stat
601  /// cache.
602  unsigned NumStatHits, NumStatMisses;
603
604  /// \brief The number of source location entries de-serialized from
605  /// the PCH file.
606  unsigned NumSLocEntriesRead;
607
608  /// \brief The number of source location entries in the chain.
609  unsigned TotalNumSLocEntries;
610
611  /// \brief The next offset for a SLocEntry after everything in this reader.
612  unsigned NextSLocOffset;
613
614  /// \brief The number of statements (and expressions) de-serialized
615  /// from the chain.
616  unsigned NumStatementsRead;
617
618  /// \brief The total number of statements (and expressions) stored
619  /// in the chain.
620  unsigned TotalNumStatements;
621
622  /// \brief The number of macros de-serialized from the chain.
623  unsigned NumMacrosRead;
624
625  /// \brief The total number of macros stored in the chain.
626  unsigned TotalNumMacros;
627
628  /// \brief The number of selectors that have been read.
629  unsigned NumSelectorsRead;
630
631  /// \brief The number of method pool entries that have been read.
632  unsigned NumMethodPoolEntriesRead;
633
634  /// \brief The number of times we have looked up a selector in the method
635  /// pool and not found anything interesting.
636  unsigned NumMethodPoolMisses;
637
638  /// \brief The total number of method pool entries in the selector table.
639  unsigned TotalNumMethodPoolEntries;
640
641  /// Number of lexical decl contexts read/total.
642  unsigned NumLexicalDeclContextsRead, TotalLexicalDeclContexts;
643
644  /// Number of visible decl contexts read/total.
645  unsigned NumVisibleDeclContextsRead, TotalVisibleDeclContexts;
646
647  /// \brief Number of Decl/types that are currently deserializing.
648  unsigned NumCurrentElementsDeserializing;
649
650  /// \brief An IdentifierInfo that has been loaded but whose top-level
651  /// declarations of the same name have not (yet) been loaded.
652  struct PendingIdentifierInfo {
653    IdentifierInfo *II;
654    llvm::SmallVector<uint32_t, 4> DeclIDs;
655  };
656
657  /// \brief The set of identifiers that were read while the AST reader was
658  /// (recursively) loading declarations.
659  ///
660  /// The declarations on the identifier chain for these identifiers will be
661  /// loaded once the recursive loading has completed.
662  std::deque<PendingIdentifierInfo> PendingIdentifierInfos;
663
664  /// \brief Contains declarations and definitions that will be
665  /// "interesting" to the ASTConsumer, when we get that AST consumer.
666  ///
667  /// "Interesting" declarations are those that have data that may
668  /// need to be emitted, such as inline function definitions or
669  /// Objective-C protocols.
670  std::deque<Decl *> InterestingDecls;
671
672  /// \brief When reading a Stmt tree, Stmt operands are placed in this stack.
673  llvm::SmallVector<Stmt *, 16> StmtStack;
674
675  /// \brief What kind of records we are reading.
676  enum ReadingKind {
677    Read_Decl, Read_Type, Read_Stmt
678  };
679
680  /// \brief What kind of records we are reading.
681  ReadingKind ReadingKind;
682
683  /// \brief RAII object to change the reading kind.
684  class ReadingKindTracker {
685    ASTReader &Reader;
686    enum ReadingKind PrevKind;
687
688    ReadingKindTracker(const ReadingKindTracker&); // do not implement
689    ReadingKindTracker &operator=(const ReadingKindTracker&);// do not implement
690
691  public:
692    ReadingKindTracker(enum ReadingKind newKind, ASTReader &reader)
693      : Reader(reader), PrevKind(Reader.ReadingKind) {
694      Reader.ReadingKind = newKind;
695    }
696
697    ~ReadingKindTracker() { Reader.ReadingKind = PrevKind; }
698  };
699
700  /// \brief All predefines buffers in the chain, to be treated as if
701  /// concatenated.
702  PCHPredefinesBlocks PCHPredefinesBuffers;
703
704  /// \brief Suggested contents of the predefines buffer, after this
705  /// PCH file has been processed.
706  ///
707  /// In most cases, this string will be empty, because the predefines
708  /// buffer computed to build the PCH file will be identical to the
709  /// predefines buffer computed from the command line. However, when
710  /// there are differences that the PCH reader can work around, this
711  /// predefines buffer may contain additional definitions.
712  std::string SuggestedPredefines;
713
714  /// \brief Reads a statement from the specified cursor.
715  Stmt *ReadStmtFromStream(PerFileData &F);
716
717  void MaybeAddSystemRootToFilename(std::string &Filename);
718
719  ASTReadResult ReadASTCore(llvm::StringRef FileName, ASTFileType Type);
720  ASTReadResult ReadASTBlock(PerFileData &F);
721  bool CheckPredefinesBuffers();
722  bool ParseLineTable(PerFileData &F, llvm::SmallVectorImpl<uint64_t> &Record);
723  ASTReadResult ReadSourceManagerBlock(PerFileData &F);
724  ASTReadResult ReadSLocEntryRecord(unsigned ID);
725  PerFileData *SLocCursorForID(unsigned ID);
726  SourceLocation getImportLocation(PerFileData *F);
727  bool ParseLanguageOptions(const llvm::SmallVectorImpl<uint64_t> &Record);
728
729  struct RecordLocation {
730    RecordLocation(PerFileData *M, uint64_t O)
731      : F(M), Offset(O) {}
732    PerFileData *F;
733    uint64_t Offset;
734  };
735
736  QualType ReadTypeRecord(unsigned Index);
737  RecordLocation TypeCursorForIndex(unsigned Index);
738  void LoadedDecl(unsigned Index, Decl *D);
739  Decl *ReadDeclRecord(unsigned Index, serialization::DeclID ID);
740  RecordLocation DeclCursorForIndex(unsigned Index, serialization::DeclID ID);
741
742  void PassInterestingDeclsToConsumer();
743
744  /// \brief Produce an error diagnostic and return true.
745  ///
746  /// This routine should only be used for fatal errors that have to
747  /// do with non-routine failures (e.g., corrupted AST file).
748  void Error(const char *Msg);
749
750  ASTReader(const ASTReader&); // do not implement
751  ASTReader &operator=(const ASTReader &); // do not implement
752public:
753  typedef llvm::SmallVector<uint64_t, 64> RecordData;
754
755  /// \brief Load the AST file and validate its contents against the given
756  /// Preprocessor.
757  ///
758  /// \param PP the preprocessor associated with the context in which this
759  /// precompiled header will be loaded.
760  ///
761  /// \param Context the AST context that this precompiled header will be
762  /// loaded into.
763  ///
764  /// \param isysroot If non-NULL, the system include path specified by the
765  /// user. This is only used with relocatable PCH files. If non-NULL,
766  /// a relocatable PCH file will use the default path "/".
767  ///
768  /// \param DisableValidation If true, the AST reader will suppress most
769  /// of its regular consistency checking, allowing the use of precompiled
770  /// headers that cannot be determined to be compatible.
771  ASTReader(Preprocessor &PP, ASTContext *Context, const char *isysroot = 0,
772            bool DisableValidation = false);
773
774  /// \brief Load the AST file without using any pre-initialized Preprocessor.
775  ///
776  /// The necessary information to initialize a Preprocessor later can be
777  /// obtained by setting a ASTReaderListener.
778  ///
779  /// \param SourceMgr the source manager into which the AST file will be loaded
780  ///
781  /// \param FileMgr the file manager into which the AST file will be loaded.
782  ///
783  /// \param Diags the diagnostics system to use for reporting errors and
784  /// warnings relevant to loading the AST file.
785  ///
786  /// \param isysroot If non-NULL, the system include path specified by the
787  /// user. This is only used with relocatable PCH files. If non-NULL,
788  /// a relocatable PCH file will use the default path "/".
789  ///
790  /// \param DisableValidation If true, the AST reader will suppress most
791  /// of its regular consistency checking, allowing the use of precompiled
792  /// headers that cannot be determined to be compatible.
793      ASTReader(SourceManager &SourceMgr, FileManager &FileMgr,
794            Diagnostic &Diags, const char *isysroot = 0,
795            bool DisableValidation = false);
796  ~ASTReader();
797
798  /// \brief Load the precompiled header designated by the given file
799  /// name.
800  ASTReadResult ReadAST(const std::string &FileName, ASTFileType Type);
801
802  /// \brief Set the AST callbacks listener.
803  void setListener(ASTReaderListener *listener) {
804    Listener.reset(listener);
805  }
806
807  /// \brief Set the AST deserialization listener.
808  void setDeserializationListener(ASTDeserializationListener *Listener);
809
810  /// \brief Set the Preprocessor to use.
811  void setPreprocessor(Preprocessor &pp);
812
813  /// \brief Sets and initializes the given Context.
814  void InitializeContext(ASTContext &Context);
815
816  /// \brief Retrieve the name of the named (primary) AST file
817  const std::string &getFileName() const { return Chain[0]->FileName; }
818
819  /// \brief Retrieve the name of the original source file name
820  const std::string &getOriginalSourceFile() { return OriginalFileName; }
821
822  /// \brief Retrieve the name of the original source file name directly from
823  /// the AST file, without actually loading the AST file.
824  static std::string getOriginalSourceFile(const std::string &ASTFileName,
825                                           Diagnostic &Diags);
826
827  /// \brief Returns the suggested contents of the predefines buffer,
828  /// which contains a (typically-empty) subset of the predefines
829  /// build prior to including the precompiled header.
830  const std::string &getSuggestedPredefines() { return SuggestedPredefines; }
831
832  /// \brief Read preprocessed entities into the
833  virtual void ReadPreprocessedEntities();
834
835  /// \brief Returns the number of source locations found in the chain.
836  unsigned getTotalNumSLocs() const {
837    return TotalNumSLocEntries;
838  }
839
840  /// \brief Returns the next SLocEntry offset after the chain.
841  unsigned getNextSLocOffset() const {
842    return NextSLocOffset;
843  }
844
845  /// \brief Returns the number of identifiers found in the chain.
846  unsigned getTotalNumIdentifiers() const {
847    return static_cast<unsigned>(IdentifiersLoaded.size());
848  }
849
850  /// \brief Returns the number of types found in the chain.
851  unsigned getTotalNumTypes() const {
852    return static_cast<unsigned>(TypesLoaded.size());
853  }
854
855  /// \brief Returns the number of declarations found in the chain.
856  unsigned getTotalNumDecls() const {
857    return static_cast<unsigned>(DeclsLoaded.size());
858  }
859
860  /// \brief Returns the number of selectors found in the chain.
861  unsigned getTotalNumSelectors() const {
862    return static_cast<unsigned>(SelectorsLoaded.size());
863  }
864
865  /// \brief Returns the number of macro definitions found in the chain.
866  unsigned getTotalNumMacroDefinitions() const {
867    return static_cast<unsigned>(MacroDefinitionsLoaded.size());
868  }
869
870  /// \brief Reads a TemplateArgumentLocInfo appropriate for the
871  /// given TemplateArgument kind.
872  TemplateArgumentLocInfo
873  GetTemplateArgumentLocInfo(PerFileData &F, TemplateArgument::ArgKind Kind,
874                             const RecordData &Record, unsigned &Idx);
875
876  /// \brief Reads a TemplateArgumentLoc.
877  TemplateArgumentLoc
878  ReadTemplateArgumentLoc(PerFileData &F,
879                          const RecordData &Record, unsigned &Idx);
880
881  /// \brief Reads a declarator info from the given record.
882  TypeSourceInfo *GetTypeSourceInfo(PerFileData &F,
883                                    const RecordData &Record, unsigned &Idx);
884
885  /// \brief Resolve and return the translation unit declaration.
886  TranslationUnitDecl *GetTranslationUnitDecl();
887
888  /// \brief Resolve a type ID into a type, potentially building a new
889  /// type.
890  QualType GetType(serialization::TypeID ID);
891
892  /// \brief Returns the type ID associated with the given type.
893  /// If the type didn't come from the AST file the ID that is returned is
894  /// marked as "doesn't exist in AST".
895  serialization::TypeID GetTypeID(QualType T) const;
896
897  /// \brief Returns the type index associated with the given type.
898  /// If the type didn't come from the AST file the index that is returned is
899  /// marked as "doesn't exist in AST".
900  serialization::TypeIdx GetTypeIdx(QualType T) const;
901
902  /// \brief Resolve a declaration ID into a declaration, potentially
903  /// building a new declaration.
904  Decl *GetDecl(serialization::DeclID ID);
905  virtual Decl *GetExternalDecl(uint32_t ID);
906
907  /// \brief Resolve the offset of a statement into a statement.
908  ///
909  /// This operation will read a new statement from the external
910  /// source each time it is called, and is meant to be used via a
911  /// LazyOffsetPtr (which is used by Decls for the body of functions, etc).
912  virtual Stmt *GetExternalDeclStmt(uint64_t Offset);
913
914  /// ReadBlockAbbrevs - Enter a subblock of the specified BlockID with the
915  /// specified cursor.  Read the abbreviations that are at the top of the block
916  /// and then leave the cursor pointing into the block.
917  bool ReadBlockAbbrevs(llvm::BitstreamCursor &Cursor, unsigned BlockID);
918
919  /// \brief Finds all the visible declarations with a given name.
920  /// The current implementation of this method just loads the entire
921  /// lookup table as unmaterialized references.
922  virtual DeclContext::lookup_result
923  FindExternalVisibleDeclsByName(const DeclContext *DC,
924                                 DeclarationName Name);
925
926  virtual void MaterializeVisibleDecls(const DeclContext *DC);
927
928  /// \brief Read all of the declarations lexically stored in a
929  /// declaration context.
930  ///
931  /// \param DC The declaration context whose declarations will be
932  /// read.
933  ///
934  /// \param Decls Vector that will contain the declarations loaded
935  /// from the external source. The caller is responsible for merging
936  /// these declarations with any declarations already stored in the
937  /// declaration context.
938  ///
939  /// \returns true if there was an error while reading the
940  /// declarations for this declaration context.
941  virtual bool FindExternalLexicalDecls(const DeclContext *DC,
942                                        bool (*isKindWeWant)(Decl::Kind),
943                                        llvm::SmallVectorImpl<Decl*> &Decls);
944
945  /// \brief Notify ASTReader that we started deserialization of
946  /// a decl or type so until FinishedDeserializing is called there may be
947  /// decls that are initializing. Must be paired with FinishedDeserializing.
948  virtual void StartedDeserializing() { ++NumCurrentElementsDeserializing; }
949
950  /// \brief Notify ASTReader that we finished the deserialization of
951  /// a decl or type. Must be paired with StartedDeserializing.
952  virtual void FinishedDeserializing();
953
954  /// \brief Function that will be invoked when we begin parsing a new
955  /// translation unit involving this external AST source.
956  ///
957  /// This function will provide all of the external definitions to
958  /// the ASTConsumer.
959  virtual void StartTranslationUnit(ASTConsumer *Consumer);
960
961  /// \brief Print some statistics about AST usage.
962  virtual void PrintStats();
963
964  /// \brief Initialize the semantic source with the Sema instance
965  /// being used to perform semantic analysis on the abstract syntax
966  /// tree.
967  virtual void InitializeSema(Sema &S);
968
969  /// \brief Inform the semantic consumer that Sema is no longer available.
970  virtual void ForgetSema() { SemaObj = 0; }
971
972  /// \brief Retrieve the IdentifierInfo for the named identifier.
973  ///
974  /// This routine builds a new IdentifierInfo for the given identifier. If any
975  /// declarations with this name are visible from translation unit scope, their
976  /// declarations will be deserialized and introduced into the declaration
977  /// chain of the identifier.
978  virtual IdentifierInfo *get(const char *NameStart, const char *NameEnd);
979  IdentifierInfo *get(llvm::StringRef Name) {
980    return get(Name.begin(), Name.end());
981  }
982
983  /// \brief Retrieve an iterator into the set of all identifiers
984  /// in all loaded AST files.
985  virtual IdentifierIterator *getIdentifiers() const;
986
987  /// \brief Load the contents of the global method pool for a given
988  /// selector.
989  ///
990  /// \returns a pair of Objective-C methods lists containing the
991  /// instance and factory methods, respectively, with this selector.
992  virtual std::pair<ObjCMethodList, ObjCMethodList>
993    ReadMethodPool(Selector Sel);
994
995  /// \brief Load a selector from disk, registering its ID if it exists.
996  void LoadSelector(Selector Sel);
997
998  void SetIdentifierInfo(unsigned ID, IdentifierInfo *II);
999  void SetGloballyVisibleDecls(IdentifierInfo *II,
1000                               const llvm::SmallVectorImpl<uint32_t> &DeclIDs,
1001                               bool Nonrecursive = false);
1002
1003  /// \brief Report a diagnostic.
1004  DiagnosticBuilder Diag(unsigned DiagID);
1005
1006  /// \brief Report a diagnostic.
1007  DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID);
1008
1009  IdentifierInfo *DecodeIdentifierInfo(unsigned Idx);
1010
1011  IdentifierInfo *GetIdentifierInfo(const RecordData &Record, unsigned &Idx) {
1012    return DecodeIdentifierInfo(Record[Idx++]);
1013  }
1014
1015  virtual IdentifierInfo *GetIdentifier(unsigned ID) {
1016    return DecodeIdentifierInfo(ID);
1017  }
1018
1019  /// \brief Read the source location entry with index ID.
1020  virtual void ReadSLocEntry(unsigned ID);
1021
1022  Selector DecodeSelector(unsigned Idx);
1023
1024  virtual Selector GetExternalSelector(uint32_t ID);
1025  uint32_t GetNumExternalSelectors();
1026
1027  Selector GetSelector(const RecordData &Record, unsigned &Idx) {
1028    return DecodeSelector(Record[Idx++]);
1029  }
1030
1031  /// \brief Read a declaration name.
1032  DeclarationName ReadDeclarationName(const RecordData &Record, unsigned &Idx);
1033  void ReadDeclarationNameLoc(PerFileData &F,
1034                              DeclarationNameLoc &DNLoc, DeclarationName Name,
1035                              const RecordData &Record, unsigned &Idx);
1036  void ReadDeclarationNameInfo(PerFileData &F, DeclarationNameInfo &NameInfo,
1037                               const RecordData &Record, unsigned &Idx);
1038
1039  void ReadQualifierInfo(PerFileData &F, QualifierInfo &Info,
1040                         const RecordData &Record, unsigned &Idx);
1041
1042  NestedNameSpecifier *ReadNestedNameSpecifier(const RecordData &Record,
1043                                               unsigned &Idx);
1044
1045  /// \brief Read a template name.
1046  TemplateName ReadTemplateName(const RecordData &Record, unsigned &Idx);
1047
1048  /// \brief Read a template argument.
1049  TemplateArgument ReadTemplateArgument(PerFileData &F,
1050                                        const RecordData &Record,unsigned &Idx);
1051
1052  /// \brief Read a template parameter list.
1053  TemplateParameterList *ReadTemplateParameterList(PerFileData &F,
1054                                                   const RecordData &Record,
1055                                                   unsigned &Idx);
1056
1057  /// \brief Read a template argument array.
1058  void
1059  ReadTemplateArgumentList(llvm::SmallVector<TemplateArgument, 8> &TemplArgs,
1060                           PerFileData &F, const RecordData &Record,
1061                           unsigned &Idx);
1062
1063  /// \brief Read a UnresolvedSet structure.
1064  void ReadUnresolvedSet(UnresolvedSetImpl &Set,
1065                         const RecordData &Record, unsigned &Idx);
1066
1067  /// \brief Read a C++ base specifier.
1068  CXXBaseSpecifier ReadCXXBaseSpecifier(PerFileData &F,
1069                                        const RecordData &Record,unsigned &Idx);
1070
1071  /// \brief Read a CXXBaseOrMemberInitializer array.
1072  std::pair<CXXBaseOrMemberInitializer **, unsigned>
1073  ReadCXXBaseOrMemberInitializers(PerFileData &F,
1074                                  const RecordData &Record, unsigned &Idx);
1075
1076  /// \brief Read a source location from raw form.
1077  SourceLocation ReadSourceLocation(PerFileData &Module, unsigned Raw) {
1078    (void)Module; // No remapping yet
1079    return SourceLocation::getFromRawEncoding(Raw);
1080  }
1081
1082  /// \brief Read a source location.
1083  SourceLocation ReadSourceLocation(PerFileData &Module,
1084                                    const RecordData &Record, unsigned& Idx) {
1085    return ReadSourceLocation(Module, Record[Idx++]);
1086  }
1087
1088  /// \brief Read a source range.
1089  SourceRange ReadSourceRange(PerFileData &F,
1090                              const RecordData &Record, unsigned& Idx);
1091
1092  /// \brief Read an integral value
1093  llvm::APInt ReadAPInt(const RecordData &Record, unsigned &Idx);
1094
1095  /// \brief Read a signed integral value
1096  llvm::APSInt ReadAPSInt(const RecordData &Record, unsigned &Idx);
1097
1098  /// \brief Read a floating-point value
1099  llvm::APFloat ReadAPFloat(const RecordData &Record, unsigned &Idx);
1100
1101  // \brief Read a string
1102  std::string ReadString(const RecordData &Record, unsigned &Idx);
1103
1104  CXXTemporary *ReadCXXTemporary(const RecordData &Record, unsigned &Idx);
1105
1106  /// \brief Reads attributes from the current stream position.
1107  void ReadAttributes(PerFileData &F, AttrVec &Attrs,
1108                      const RecordData &Record, unsigned &Idx);
1109
1110  /// \brief Reads a statement.
1111  Stmt *ReadStmt(PerFileData &F);
1112
1113  /// \brief Reads an expression.
1114  Expr *ReadExpr(PerFileData &F);
1115
1116  /// \brief Reads a sub-statement operand during statement reading.
1117  Stmt *ReadSubStmt() {
1118    assert(ReadingKind == Read_Stmt &&
1119           "Should be called only during statement reading!");
1120    // Subexpressions are stored from last to first, so the next Stmt we need
1121    // is at the back of the stack.
1122    assert(!StmtStack.empty() && "Read too many sub statements!");
1123    return StmtStack.pop_back_val();
1124  }
1125
1126  /// \brief Reads a sub-expression operand during statement reading.
1127  Expr *ReadSubExpr();
1128
1129  /// \brief Reads the macro record located at the given offset.
1130  void ReadMacroRecord(PerFileData &F, uint64_t Offset);
1131
1132  /// \brief Read the set of macros defined by this external macro source.
1133  virtual void ReadDefinedMacros();
1134
1135  /// \brief Retrieve the macro definition with the given ID.
1136  MacroDefinition *getMacroDefinition(serialization::MacroID ID);
1137
1138  /// \brief Retrieve the AST context that this AST reader supplements.
1139  ASTContext *getContext() { return Context; }
1140
1141  // \brief Contains declarations that were loaded before we have
1142  // access to a Sema object.
1143  llvm::SmallVector<NamedDecl *, 16> PreloadedDecls;
1144
1145  /// \brief Retrieve the semantic analysis object used to analyze the
1146  /// translation unit in which the precompiled header is being
1147  /// imported.
1148  Sema *getSema() { return SemaObj; }
1149
1150  /// \brief Retrieve the identifier table associated with the
1151  /// preprocessor.
1152  IdentifierTable &getIdentifierTable();
1153
1154  /// \brief Record that the given ID maps to the given switch-case
1155  /// statement.
1156  void RecordSwitchCaseID(SwitchCase *SC, unsigned ID);
1157
1158  /// \brief Retrieve the switch-case statement with the given ID.
1159  SwitchCase *getSwitchCaseWithID(unsigned ID);
1160
1161  /// \brief Record that the given label statement has been
1162  /// deserialized and has the given ID.
1163  void RecordLabelStmt(LabelStmt *S, unsigned ID);
1164
1165  /// \brief Set the label of the given statement to the label
1166  /// identified by ID.
1167  ///
1168  /// Depending on the order in which the label and other statements
1169  /// referencing that label occur, this operation may complete
1170  /// immediately (updating the statement) or it may queue the
1171  /// statement to be back-patched later.
1172  void SetLabelOf(GotoStmt *S, unsigned ID);
1173
1174  /// \brief Set the label of the given expression to the label
1175  /// identified by ID.
1176  ///
1177  /// Depending on the order in which the label and other statements
1178  /// referencing that label occur, this operation may complete
1179  /// immediately (updating the statement) or it may queue the
1180  /// statement to be back-patched later.
1181  void SetLabelOf(AddrLabelExpr *S, unsigned ID);
1182};
1183
1184/// \brief Helper class that saves the current stream position and
1185/// then restores it when destroyed.
1186struct SavedStreamPosition {
1187  explicit SavedStreamPosition(llvm::BitstreamCursor &Cursor)
1188  : Cursor(Cursor), Offset(Cursor.GetCurrentBitNo()) { }
1189
1190  ~SavedStreamPosition() {
1191    Cursor.JumpToBit(Offset);
1192  }
1193
1194private:
1195  llvm::BitstreamCursor &Cursor;
1196  uint64_t Offset;
1197};
1198
1199inline void PCHValidator::Error(const char *Msg) {
1200  Reader.Error(Msg);
1201}
1202
1203} // end namespace clang
1204
1205#endif
1206