ASTWriter.h revision f73c93fea0d4b447585bc7459499ba6b822e045c
1//===--- ASTWriter.h - AST File Writer --------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the ASTWriter class, which writes an AST file
11//  containing a serialized representation of a translation unit.
12//
13//===----------------------------------------------------------------------===//
14#ifndef LLVM_CLANG_FRONTEND_AST_WRITER_H
15#define LLVM_CLANG_FRONTEND_AST_WRITER_H
16
17#include "clang/AST/Decl.h"
18#include "clang/AST/DeclarationName.h"
19#include "clang/AST/TemplateBase.h"
20#include "clang/Serialization/ASTBitCodes.h"
21#include "clang/Serialization/ASTDeserializationListener.h"
22#include "clang/Sema/SemaConsumer.h"
23#include "llvm/ADT/SmallPtrSet.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/Bitcode/BitstreamWriter.h"
26#include <map>
27#include <queue>
28#include <vector>
29
30namespace llvm {
31  class APFloat;
32  class APInt;
33  class BitstreamWriter;
34}
35
36namespace clang {
37
38class ASTContext;
39class NestedNameSpecifier;
40class CXXBaseSpecifier;
41class CXXBaseOrMemberInitializer;
42class LabelStmt;
43class MacroDefinition;
44class MemorizeStatCalls;
45class ASTReader;
46class Preprocessor;
47class Sema;
48class SourceManager;
49class SwitchCase;
50class TargetInfo;
51
52/// \brief Writes an AST file containing the contents of a translation unit.
53///
54/// The ASTWriter class produces a bitstream containing the serialized
55/// representation of a given abstract syntax tree and its supporting
56/// data structures. This bitstream can be de-serialized via an
57/// instance of the ASTReader class.
58class ASTWriter : public ASTDeserializationListener {
59public:
60  typedef llvm::SmallVector<uint64_t, 64> RecordData;
61
62  friend class ASTDeclWriter;
63private:
64  /// \brief The bitstream writer used to emit this precompiled header.
65  llvm::BitstreamWriter &Stream;
66
67  /// \brief The reader of existing AST files, if we're chaining.
68  ASTReader *Chain;
69
70  /// \brief Stores a declaration or a type to be written to the AST file.
71  class DeclOrType {
72  public:
73    DeclOrType(Decl *D) : Stored(D), IsType(false) { }
74    DeclOrType(QualType T) : Stored(T.getAsOpaquePtr()), IsType(true) { }
75
76    bool isType() const { return IsType; }
77    bool isDecl() const { return !IsType; }
78
79    QualType getType() const {
80      assert(isType() && "Not a type!");
81      return QualType::getFromOpaquePtr(Stored);
82    }
83
84    Decl *getDecl() const {
85      assert(isDecl() && "Not a decl!");
86      return static_cast<Decl *>(Stored);
87    }
88
89  private:
90    void *Stored;
91    bool IsType;
92  };
93
94  /// \brief The declarations and types to emit.
95  std::queue<DeclOrType> DeclTypesToEmit;
96
97  /// \brief The first ID number we can use for our own declarations.
98  serialization::DeclID FirstDeclID;
99
100  /// \brief The decl ID that will be assigned to the next new decl.
101  serialization::DeclID NextDeclID;
102
103  /// \brief Map that provides the ID numbers of each declaration within
104  /// the output stream, as well as those deserialized from a chained PCH.
105  ///
106  /// The ID numbers of declarations are consecutive (in order of
107  /// discovery) and start at 2. 1 is reserved for the translation
108  /// unit, while 0 is reserved for NULL.
109  llvm::DenseMap<const Decl *, serialization::DeclID> DeclIDs;
110
111  /// \brief Offset of each declaration in the bitstream, indexed by
112  /// the declaration's ID.
113  std::vector<uint32_t> DeclOffsets;
114
115  /// \brief The first ID number we can use for our own types.
116  serialization::TypeID FirstTypeID;
117
118  /// \brief The type ID that will be assigned to the next new type.
119  serialization::TypeID NextTypeID;
120
121  /// \brief Map that provides the ID numbers of each type within the
122  /// output stream, plus those deserialized from a chained PCH.
123  ///
124  /// The ID numbers of types are consecutive (in order of discovery)
125  /// and start at 1. 0 is reserved for NULL. When types are actually
126  /// stored in the stream, the ID number is shifted by 2 bits to
127  /// allow for the const/volatile qualifiers.
128  ///
129  /// Keys in the map never have const/volatile qualifiers.
130  serialization::TypeIdxMap TypeIdxs;
131
132  /// \brief Offset of each type in the bitstream, indexed by
133  /// the type's ID.
134  std::vector<uint32_t> TypeOffsets;
135
136  /// \brief The first ID number we can use for our own identifiers.
137  serialization::IdentID FirstIdentID;
138
139  /// \brief The identifier ID that will be assigned to the next new identifier.
140  serialization::IdentID NextIdentID;
141
142  /// \brief Map that provides the ID numbers of each identifier in
143  /// the output stream.
144  ///
145  /// The ID numbers for identifiers are consecutive (in order of
146  /// discovery), starting at 1. An ID of zero refers to a NULL
147  /// IdentifierInfo.
148  llvm::DenseMap<const IdentifierInfo *, serialization::IdentID> IdentifierIDs;
149
150  /// \brief Offsets of each of the identifier IDs into the identifier
151  /// table.
152  std::vector<uint32_t> IdentifierOffsets;
153
154  /// \brief The first ID number we can use for our own selectors.
155  serialization::SelectorID FirstSelectorID;
156
157  /// \brief The selector ID that will be assigned to the next new identifier.
158  serialization::SelectorID NextSelectorID;
159
160  /// \brief Map that provides the ID numbers of each Selector.
161  llvm::DenseMap<Selector, serialization::SelectorID> SelectorIDs;
162
163  /// \brief Offset of each selector within the method pool/selector
164  /// table, indexed by the Selector ID (-1).
165  std::vector<uint32_t> SelectorOffsets;
166
167  /// \brief Offsets of each of the macro identifiers into the
168  /// bitstream.
169  ///
170  /// For each identifier that is associated with a macro, this map
171  /// provides the offset into the bitstream where that macro is
172  /// defined.
173  llvm::DenseMap<const IdentifierInfo *, uint64_t> MacroOffsets;
174
175  /// \brief Mapping from macro definitions (as they occur in the preprocessing
176  /// record) to the macro IDs.
177  llvm::DenseMap<const MacroDefinition *, serialization::MacroID>
178      MacroDefinitions;
179
180  /// \brief Mapping from the macro definition indices in \c MacroDefinitions
181  /// to the corresponding offsets within the preprocessor block.
182  std::vector<uint32_t> MacroDefinitionOffsets;
183
184  typedef llvm::DenseMap<Decl *, Decl *> FirstLatestDeclMap;
185  /// \brief Map of first declarations from a chained PCH that point to the
186  /// most recent declarations in another PCH.
187  FirstLatestDeclMap FirstLatestDecls;
188
189  /// \brief Declarations encountered that might be external
190  /// definitions.
191  ///
192  /// We keep track of external definitions (as well as tentative
193  /// definitions) as we are emitting declarations to the AST
194  /// file. The AST file contains a separate record for these external
195  /// definitions, which are provided to the AST consumer by the AST
196  /// reader. This is behavior is required to properly cope with,
197  /// e.g., tentative variable definitions that occur within
198  /// headers. The declarations themselves are stored as declaration
199  /// IDs, since they will be written out to an EXTERNAL_DEFINITIONS
200  /// record.
201  llvm::SmallVector<uint64_t, 16> ExternalDefinitions;
202
203  /// \brief Namespaces that have received extensions since their serialized
204  /// form.
205  ///
206  /// Basically, when we're chaining and encountering a namespace, we check if
207  /// its primary namespace comes from the chain. If it does, we add the primary
208  /// to this set, so that we can write out lexical content updates for it.
209  llvm::SmallPtrSet<const NamespaceDecl *, 16> UpdatedNamespaces;
210
211  /// \brief Decls that have been replaced in the current dependent AST file.
212  ///
213  /// When a decl changes fundamentally after being deserialized (this shouldn't
214  /// happen, but the ObjC AST nodes are designed this way), it will be
215  /// serialized again. In this case, it is registered here, so that the reader
216  /// knows to read the updated version.
217  llvm::SmallVector<std::pair<serialization::DeclID, uint64_t>, 16>
218      ReplacedDecls;
219
220  typedef llvm::SmallVector<serialization::DeclID, 4>
221      AdditionalTemplateSpecializationsList;
222  typedef llvm::DenseMap<serialization::DeclID,
223                         AdditionalTemplateSpecializationsList>
224      AdditionalTemplateSpecializationsMap;
225
226  /// \brief Additional specializations (including partial) of templates that
227  /// were introduced after the template was serialized.
228  AdditionalTemplateSpecializationsMap AdditionalTemplateSpecializations;
229
230  /// \brief Statements that we've encountered while serializing a
231  /// declaration or type.
232  llvm::SmallVector<Stmt *, 16> StmtsToEmit;
233
234  /// \brief Statements collection to use for ASTWriter::AddStmt().
235  /// It will point to StmtsToEmit unless it is overriden.
236  llvm::SmallVector<Stmt *, 16> *CollectedStmts;
237
238  /// \brief Mapping from SwitchCase statements to IDs.
239  std::map<SwitchCase *, unsigned> SwitchCaseIDs;
240
241  /// \brief Mapping from LabelStmt statements to IDs.
242  std::map<LabelStmt *, unsigned> LabelIDs;
243
244  /// \brief The number of statements written to the AST file.
245  unsigned NumStatements;
246
247  /// \brief The number of macros written to the AST file.
248  unsigned NumMacros;
249
250  /// \brief The number of lexical declcontexts written to the AST
251  /// file.
252  unsigned NumLexicalDeclContexts;
253
254  /// \brief The number of visible declcontexts written to the AST
255  /// file.
256  unsigned NumVisibleDeclContexts;
257
258  /// \brief Write the given subexpression to the bitstream.
259  void WriteSubStmt(Stmt *S);
260
261  void WriteBlockInfoBlock();
262  void WriteMetadata(ASTContext &Context, const char *isysroot);
263  void WriteLanguageOptions(const LangOptions &LangOpts);
264  void WriteStatCache(MemorizeStatCalls &StatCalls);
265  void WriteSourceManagerBlock(SourceManager &SourceMgr,
266                               const Preprocessor &PP,
267                               const char* isysroot);
268  void WritePreprocessor(const Preprocessor &PP);
269  void WriteType(QualType T);
270  uint64_t WriteDeclContextLexicalBlock(ASTContext &Context, DeclContext *DC);
271  uint64_t WriteDeclContextVisibleBlock(ASTContext &Context, DeclContext *DC);
272  void WriteTypeDeclOffsets();
273  void WriteSelectors(Sema &SemaRef);
274  void WriteReferencedSelectorsPool(Sema &SemaRef);
275  void WriteIdentifierTable(Preprocessor &PP);
276  void WriteAttributeRecord(const AttrVec &Attrs);
277  void WriteDeclUpdateBlock();
278  void WriteDeclContextVisibleUpdate(const DeclContext *DC);
279  void WriteAdditionalTemplateSpecializations();
280
281  unsigned ParmVarDeclAbbrev;
282  unsigned DeclContextLexicalAbbrev;
283  unsigned DeclContextVisibleLookupAbbrev;
284  unsigned UpdateVisibleAbbrev;
285  void WriteDeclsBlockAbbrevs();
286  void WriteDecl(ASTContext &Context, Decl *D);
287
288  void WriteASTCore(Sema &SemaRef, MemorizeStatCalls *StatCalls,
289                    const char* isysroot);
290  void WriteASTChain(Sema &SemaRef, MemorizeStatCalls *StatCalls,
291                     const char* isysroot);
292
293public:
294  /// \brief Create a new precompiled header writer that outputs to
295  /// the given bitstream.
296  ASTWriter(llvm::BitstreamWriter &Stream);
297
298  /// \brief Write a precompiled header for the given semantic analysis.
299  ///
300  /// \param SemaRef a reference to the semantic analysis object that processed
301  /// the AST to be written into the precompiled header.
302  ///
303  /// \param StatCalls the object that cached all of the stat() calls made while
304  /// searching for source files and headers.
305  ///
306  /// \param isysroot if non-NULL, write a relocatable PCH file whose headers
307  /// are relative to the given system root.
308  ///
309  /// \param PPRec Record of the preprocessing actions that occurred while
310  /// preprocessing this file, e.g., macro instantiations
311  void WriteAST(Sema &SemaRef, MemorizeStatCalls *StatCalls,
312                const char* isysroot);
313
314  /// \brief Emit a source location.
315  void AddSourceLocation(SourceLocation Loc, RecordData &Record);
316
317  /// \brief Emit a source range.
318  void AddSourceRange(SourceRange Range, RecordData &Record);
319
320  /// \brief Emit an integral value.
321  void AddAPInt(const llvm::APInt &Value, RecordData &Record);
322
323  /// \brief Emit a signed integral value.
324  void AddAPSInt(const llvm::APSInt &Value, RecordData &Record);
325
326  /// \brief Emit a floating-point value.
327  void AddAPFloat(const llvm::APFloat &Value, RecordData &Record);
328
329  /// \brief Emit a reference to an identifier.
330  void AddIdentifierRef(const IdentifierInfo *II, RecordData &Record);
331
332  /// \brief Emit a Selector (which is a smart pointer reference).
333  void AddSelectorRef(Selector, RecordData &Record);
334
335  /// \brief Emit a CXXTemporary.
336  void AddCXXTemporary(const CXXTemporary *Temp, RecordData &Record);
337
338  /// \brief Get the unique number used to refer to the given selector.
339  serialization::SelectorID getSelectorRef(Selector Sel);
340
341  /// \brief Get the unique number used to refer to the given identifier.
342  serialization::IdentID getIdentifierRef(const IdentifierInfo *II);
343
344  /// \brief Retrieve the offset of the macro definition for the given
345  /// identifier.
346  ///
347  /// The identifier must refer to a macro.
348  uint64_t getMacroOffset(const IdentifierInfo *II) {
349    assert(MacroOffsets.find(II) != MacroOffsets.end() &&
350           "Identifier does not name a macro");
351    return MacroOffsets[II];
352  }
353
354  /// \brief Retrieve the ID number corresponding to the given macro
355  /// definition.
356  serialization::MacroID getMacroDefinitionID(MacroDefinition *MD);
357
358  /// \brief Emit a reference to a type.
359  void AddTypeRef(QualType T, RecordData &Record);
360
361  /// \brief Force a type to be emitted and get its ID.
362  serialization::TypeID GetOrCreateTypeID(QualType T);
363
364  /// \brief Determine the type ID of an already-emitted type.
365  serialization::TypeID getTypeID(QualType T) const;
366
367  /// \brief Force a type to be emitted and get its index.
368  serialization::TypeIdx GetOrCreateTypeIdx(QualType T);
369
370  /// \brief Determine the type index of an already-emitted type.
371  serialization::TypeIdx getTypeIdx(QualType T) const;
372
373  /// \brief Emits a reference to a declarator info.
374  void AddTypeSourceInfo(TypeSourceInfo *TInfo, RecordData &Record);
375
376  /// \brief Emits a template argument location info.
377  void AddTemplateArgumentLocInfo(TemplateArgument::ArgKind Kind,
378                                  const TemplateArgumentLocInfo &Arg,
379                                  RecordData &Record);
380
381  /// \brief Emits a template argument location.
382  void AddTemplateArgumentLoc(const TemplateArgumentLoc &Arg,
383                              RecordData &Record);
384
385  /// \brief Emit a reference to a declaration.
386  void AddDeclRef(const Decl *D, RecordData &Record);
387
388  /// \brief Force a declaration to be emitted and get its ID.
389  serialization::DeclID GetDeclRef(const Decl *D);
390
391  /// \brief Determine the declaration ID of an already-emitted
392  /// declaration.
393  serialization::DeclID getDeclID(const Decl *D);
394
395  /// \brief Emit a declaration name.
396  void AddDeclarationName(DeclarationName Name, RecordData &Record);
397
398  /// \brief Emit a nested name specifier.
399  void AddNestedNameSpecifier(NestedNameSpecifier *NNS, RecordData &Record);
400
401  /// \brief Emit a template name.
402  void AddTemplateName(TemplateName Name, RecordData &Record);
403
404  /// \brief Emit a template argument.
405  void AddTemplateArgument(const TemplateArgument &Arg, RecordData &Record);
406
407  /// \brief Emit a template parameter list.
408  void AddTemplateParameterList(const TemplateParameterList *TemplateParams,
409                                RecordData &Record);
410
411  /// \brief Emit a template argument list.
412  void AddTemplateArgumentList(const TemplateArgumentList *TemplateArgs,
413                                RecordData &Record);
414
415  /// \brief Emit a UnresolvedSet structure.
416  void AddUnresolvedSet(const UnresolvedSetImpl &Set, RecordData &Record);
417
418  /// \brief Emit a C++ base specifier.
419  void AddCXXBaseSpecifier(const CXXBaseSpecifier &Base, RecordData &Record);
420
421  /// \brief Emit a CXXBaseOrMemberInitializer array.
422  void AddCXXBaseOrMemberInitializers(
423                        const CXXBaseOrMemberInitializer * const *BaseOrMembers,
424                        unsigned NumBaseOrMembers, RecordData &Record);
425
426  /// \brief Add a string to the given record.
427  void AddString(llvm::StringRef Str, RecordData &Record);
428
429  /// \brief Mark a namespace as needing an update.
430  void AddUpdatedNamespace(const NamespaceDecl *NS) {
431    UpdatedNamespaces.insert(NS);
432  }
433
434  /// \brief Record a template specialization or partial specialization of
435  /// a template from a previous PCH file.
436  void AddAdditionalTemplateSpecialization(serialization::DeclID Templ,
437                                           serialization::DeclID Spec) {
438    AdditionalTemplateSpecializations[Templ].push_back(Spec);
439  }
440
441  /// \brief Note that the identifier II occurs at the given offset
442  /// within the identifier table.
443  void SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset);
444
445  /// \brief Note that the selector Sel occurs at the given offset
446  /// within the method pool/selector table.
447  void SetSelectorOffset(Selector Sel, uint32_t Offset);
448
449  /// \brief Add the given statement or expression to the queue of
450  /// statements to emit.
451  ///
452  /// This routine should be used when emitting types and declarations
453  /// that have expressions as part of their formulation. Once the
454  /// type or declaration has been written, call FlushStmts() to write
455  /// the corresponding statements just after the type or
456  /// declaration.
457  void AddStmt(Stmt *S) {
458      CollectedStmts->push_back(S);
459  }
460
461  /// \brief Flush all of the statements and expressions that have
462  /// been added to the queue via AddStmt().
463  void FlushStmts();
464
465  /// \brief Record an ID for the given switch-case statement.
466  unsigned RecordSwitchCaseID(SwitchCase *S);
467
468  /// \brief Retrieve the ID for the given switch-case statement.
469  unsigned getSwitchCaseID(SwitchCase *S);
470
471  /// \brief Retrieve the ID for the given label statement, which may
472  /// or may not have been emitted yet.
473  unsigned GetLabelID(LabelStmt *S);
474
475  unsigned getParmVarDeclAbbrev() const { return ParmVarDeclAbbrev; }
476
477  bool hasChain() const { return Chain; }
478
479  // ASTDeserializationListener implementation
480  void SetReader(ASTReader *Reader);
481  void IdentifierRead(serialization::IdentID ID, IdentifierInfo *II);
482  void TypeRead(serialization::TypeIdx Idx, QualType T);
483  void DeclRead(serialization::DeclID ID, const Decl *D);
484  void SelectorRead(serialization::SelectorID iD, Selector Sel);
485};
486
487/// \brief AST and semantic-analysis consumer that generates a
488/// precompiled header from the parsed source code.
489class PCHGenerator : public SemaConsumer {
490  const Preprocessor &PP;
491  const char *isysroot;
492  llvm::raw_ostream *Out;
493  Sema *SemaPtr;
494  MemorizeStatCalls *StatCalls; // owned by the FileManager
495  std::vector<unsigned char> Buffer;
496  llvm::BitstreamWriter Stream;
497  ASTWriter Writer;
498
499protected:
500  ASTWriter &getWriter() { return Writer; }
501  const ASTWriter &getWriter() const { return Writer; }
502
503public:
504  PCHGenerator(const Preprocessor &PP, bool Chaining,
505               const char *isysroot, llvm::raw_ostream *Out);
506  virtual void InitializeSema(Sema &S) { SemaPtr = &S; }
507  virtual void HandleTranslationUnit(ASTContext &Ctx);
508  virtual ASTDeserializationListener *GetASTDeserializationListener();
509};
510
511} // end namespace clang
512
513#endif
514