ASTWriter.h revision f73c93fea0d4b447585bc7459499ba6b822e045c
1//===--- ASTWriter.h - AST File Writer --------------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the ASTWriter class, which writes an AST file 11// containing a serialized representation of a translation unit. 12// 13//===----------------------------------------------------------------------===// 14#ifndef LLVM_CLANG_FRONTEND_AST_WRITER_H 15#define LLVM_CLANG_FRONTEND_AST_WRITER_H 16 17#include "clang/AST/Decl.h" 18#include "clang/AST/DeclarationName.h" 19#include "clang/AST/TemplateBase.h" 20#include "clang/Serialization/ASTBitCodes.h" 21#include "clang/Serialization/ASTDeserializationListener.h" 22#include "clang/Sema/SemaConsumer.h" 23#include "llvm/ADT/SmallPtrSet.h" 24#include "llvm/ADT/SmallVector.h" 25#include "llvm/Bitcode/BitstreamWriter.h" 26#include <map> 27#include <queue> 28#include <vector> 29 30namespace llvm { 31 class APFloat; 32 class APInt; 33 class BitstreamWriter; 34} 35 36namespace clang { 37 38class ASTContext; 39class NestedNameSpecifier; 40class CXXBaseSpecifier; 41class CXXBaseOrMemberInitializer; 42class LabelStmt; 43class MacroDefinition; 44class MemorizeStatCalls; 45class ASTReader; 46class Preprocessor; 47class Sema; 48class SourceManager; 49class SwitchCase; 50class TargetInfo; 51 52/// \brief Writes an AST file containing the contents of a translation unit. 53/// 54/// The ASTWriter class produces a bitstream containing the serialized 55/// representation of a given abstract syntax tree and its supporting 56/// data structures. This bitstream can be de-serialized via an 57/// instance of the ASTReader class. 58class ASTWriter : public ASTDeserializationListener { 59public: 60 typedef llvm::SmallVector<uint64_t, 64> RecordData; 61 62 friend class ASTDeclWriter; 63private: 64 /// \brief The bitstream writer used to emit this precompiled header. 65 llvm::BitstreamWriter &Stream; 66 67 /// \brief The reader of existing AST files, if we're chaining. 68 ASTReader *Chain; 69 70 /// \brief Stores a declaration or a type to be written to the AST file. 71 class DeclOrType { 72 public: 73 DeclOrType(Decl *D) : Stored(D), IsType(false) { } 74 DeclOrType(QualType T) : Stored(T.getAsOpaquePtr()), IsType(true) { } 75 76 bool isType() const { return IsType; } 77 bool isDecl() const { return !IsType; } 78 79 QualType getType() const { 80 assert(isType() && "Not a type!"); 81 return QualType::getFromOpaquePtr(Stored); 82 } 83 84 Decl *getDecl() const { 85 assert(isDecl() && "Not a decl!"); 86 return static_cast<Decl *>(Stored); 87 } 88 89 private: 90 void *Stored; 91 bool IsType; 92 }; 93 94 /// \brief The declarations and types to emit. 95 std::queue<DeclOrType> DeclTypesToEmit; 96 97 /// \brief The first ID number we can use for our own declarations. 98 serialization::DeclID FirstDeclID; 99 100 /// \brief The decl ID that will be assigned to the next new decl. 101 serialization::DeclID NextDeclID; 102 103 /// \brief Map that provides the ID numbers of each declaration within 104 /// the output stream, as well as those deserialized from a chained PCH. 105 /// 106 /// The ID numbers of declarations are consecutive (in order of 107 /// discovery) and start at 2. 1 is reserved for the translation 108 /// unit, while 0 is reserved for NULL. 109 llvm::DenseMap<const Decl *, serialization::DeclID> DeclIDs; 110 111 /// \brief Offset of each declaration in the bitstream, indexed by 112 /// the declaration's ID. 113 std::vector<uint32_t> DeclOffsets; 114 115 /// \brief The first ID number we can use for our own types. 116 serialization::TypeID FirstTypeID; 117 118 /// \brief The type ID that will be assigned to the next new type. 119 serialization::TypeID NextTypeID; 120 121 /// \brief Map that provides the ID numbers of each type within the 122 /// output stream, plus those deserialized from a chained PCH. 123 /// 124 /// The ID numbers of types are consecutive (in order of discovery) 125 /// and start at 1. 0 is reserved for NULL. When types are actually 126 /// stored in the stream, the ID number is shifted by 2 bits to 127 /// allow for the const/volatile qualifiers. 128 /// 129 /// Keys in the map never have const/volatile qualifiers. 130 serialization::TypeIdxMap TypeIdxs; 131 132 /// \brief Offset of each type in the bitstream, indexed by 133 /// the type's ID. 134 std::vector<uint32_t> TypeOffsets; 135 136 /// \brief The first ID number we can use for our own identifiers. 137 serialization::IdentID FirstIdentID; 138 139 /// \brief The identifier ID that will be assigned to the next new identifier. 140 serialization::IdentID NextIdentID; 141 142 /// \brief Map that provides the ID numbers of each identifier in 143 /// the output stream. 144 /// 145 /// The ID numbers for identifiers are consecutive (in order of 146 /// discovery), starting at 1. An ID of zero refers to a NULL 147 /// IdentifierInfo. 148 llvm::DenseMap<const IdentifierInfo *, serialization::IdentID> IdentifierIDs; 149 150 /// \brief Offsets of each of the identifier IDs into the identifier 151 /// table. 152 std::vector<uint32_t> IdentifierOffsets; 153 154 /// \brief The first ID number we can use for our own selectors. 155 serialization::SelectorID FirstSelectorID; 156 157 /// \brief The selector ID that will be assigned to the next new identifier. 158 serialization::SelectorID NextSelectorID; 159 160 /// \brief Map that provides the ID numbers of each Selector. 161 llvm::DenseMap<Selector, serialization::SelectorID> SelectorIDs; 162 163 /// \brief Offset of each selector within the method pool/selector 164 /// table, indexed by the Selector ID (-1). 165 std::vector<uint32_t> SelectorOffsets; 166 167 /// \brief Offsets of each of the macro identifiers into the 168 /// bitstream. 169 /// 170 /// For each identifier that is associated with a macro, this map 171 /// provides the offset into the bitstream where that macro is 172 /// defined. 173 llvm::DenseMap<const IdentifierInfo *, uint64_t> MacroOffsets; 174 175 /// \brief Mapping from macro definitions (as they occur in the preprocessing 176 /// record) to the macro IDs. 177 llvm::DenseMap<const MacroDefinition *, serialization::MacroID> 178 MacroDefinitions; 179 180 /// \brief Mapping from the macro definition indices in \c MacroDefinitions 181 /// to the corresponding offsets within the preprocessor block. 182 std::vector<uint32_t> MacroDefinitionOffsets; 183 184 typedef llvm::DenseMap<Decl *, Decl *> FirstLatestDeclMap; 185 /// \brief Map of first declarations from a chained PCH that point to the 186 /// most recent declarations in another PCH. 187 FirstLatestDeclMap FirstLatestDecls; 188 189 /// \brief Declarations encountered that might be external 190 /// definitions. 191 /// 192 /// We keep track of external definitions (as well as tentative 193 /// definitions) as we are emitting declarations to the AST 194 /// file. The AST file contains a separate record for these external 195 /// definitions, which are provided to the AST consumer by the AST 196 /// reader. This is behavior is required to properly cope with, 197 /// e.g., tentative variable definitions that occur within 198 /// headers. The declarations themselves are stored as declaration 199 /// IDs, since they will be written out to an EXTERNAL_DEFINITIONS 200 /// record. 201 llvm::SmallVector<uint64_t, 16> ExternalDefinitions; 202 203 /// \brief Namespaces that have received extensions since their serialized 204 /// form. 205 /// 206 /// Basically, when we're chaining and encountering a namespace, we check if 207 /// its primary namespace comes from the chain. If it does, we add the primary 208 /// to this set, so that we can write out lexical content updates for it. 209 llvm::SmallPtrSet<const NamespaceDecl *, 16> UpdatedNamespaces; 210 211 /// \brief Decls that have been replaced in the current dependent AST file. 212 /// 213 /// When a decl changes fundamentally after being deserialized (this shouldn't 214 /// happen, but the ObjC AST nodes are designed this way), it will be 215 /// serialized again. In this case, it is registered here, so that the reader 216 /// knows to read the updated version. 217 llvm::SmallVector<std::pair<serialization::DeclID, uint64_t>, 16> 218 ReplacedDecls; 219 220 typedef llvm::SmallVector<serialization::DeclID, 4> 221 AdditionalTemplateSpecializationsList; 222 typedef llvm::DenseMap<serialization::DeclID, 223 AdditionalTemplateSpecializationsList> 224 AdditionalTemplateSpecializationsMap; 225 226 /// \brief Additional specializations (including partial) of templates that 227 /// were introduced after the template was serialized. 228 AdditionalTemplateSpecializationsMap AdditionalTemplateSpecializations; 229 230 /// \brief Statements that we've encountered while serializing a 231 /// declaration or type. 232 llvm::SmallVector<Stmt *, 16> StmtsToEmit; 233 234 /// \brief Statements collection to use for ASTWriter::AddStmt(). 235 /// It will point to StmtsToEmit unless it is overriden. 236 llvm::SmallVector<Stmt *, 16> *CollectedStmts; 237 238 /// \brief Mapping from SwitchCase statements to IDs. 239 std::map<SwitchCase *, unsigned> SwitchCaseIDs; 240 241 /// \brief Mapping from LabelStmt statements to IDs. 242 std::map<LabelStmt *, unsigned> LabelIDs; 243 244 /// \brief The number of statements written to the AST file. 245 unsigned NumStatements; 246 247 /// \brief The number of macros written to the AST file. 248 unsigned NumMacros; 249 250 /// \brief The number of lexical declcontexts written to the AST 251 /// file. 252 unsigned NumLexicalDeclContexts; 253 254 /// \brief The number of visible declcontexts written to the AST 255 /// file. 256 unsigned NumVisibleDeclContexts; 257 258 /// \brief Write the given subexpression to the bitstream. 259 void WriteSubStmt(Stmt *S); 260 261 void WriteBlockInfoBlock(); 262 void WriteMetadata(ASTContext &Context, const char *isysroot); 263 void WriteLanguageOptions(const LangOptions &LangOpts); 264 void WriteStatCache(MemorizeStatCalls &StatCalls); 265 void WriteSourceManagerBlock(SourceManager &SourceMgr, 266 const Preprocessor &PP, 267 const char* isysroot); 268 void WritePreprocessor(const Preprocessor &PP); 269 void WriteType(QualType T); 270 uint64_t WriteDeclContextLexicalBlock(ASTContext &Context, DeclContext *DC); 271 uint64_t WriteDeclContextVisibleBlock(ASTContext &Context, DeclContext *DC); 272 void WriteTypeDeclOffsets(); 273 void WriteSelectors(Sema &SemaRef); 274 void WriteReferencedSelectorsPool(Sema &SemaRef); 275 void WriteIdentifierTable(Preprocessor &PP); 276 void WriteAttributeRecord(const AttrVec &Attrs); 277 void WriteDeclUpdateBlock(); 278 void WriteDeclContextVisibleUpdate(const DeclContext *DC); 279 void WriteAdditionalTemplateSpecializations(); 280 281 unsigned ParmVarDeclAbbrev; 282 unsigned DeclContextLexicalAbbrev; 283 unsigned DeclContextVisibleLookupAbbrev; 284 unsigned UpdateVisibleAbbrev; 285 void WriteDeclsBlockAbbrevs(); 286 void WriteDecl(ASTContext &Context, Decl *D); 287 288 void WriteASTCore(Sema &SemaRef, MemorizeStatCalls *StatCalls, 289 const char* isysroot); 290 void WriteASTChain(Sema &SemaRef, MemorizeStatCalls *StatCalls, 291 const char* isysroot); 292 293public: 294 /// \brief Create a new precompiled header writer that outputs to 295 /// the given bitstream. 296 ASTWriter(llvm::BitstreamWriter &Stream); 297 298 /// \brief Write a precompiled header for the given semantic analysis. 299 /// 300 /// \param SemaRef a reference to the semantic analysis object that processed 301 /// the AST to be written into the precompiled header. 302 /// 303 /// \param StatCalls the object that cached all of the stat() calls made while 304 /// searching for source files and headers. 305 /// 306 /// \param isysroot if non-NULL, write a relocatable PCH file whose headers 307 /// are relative to the given system root. 308 /// 309 /// \param PPRec Record of the preprocessing actions that occurred while 310 /// preprocessing this file, e.g., macro instantiations 311 void WriteAST(Sema &SemaRef, MemorizeStatCalls *StatCalls, 312 const char* isysroot); 313 314 /// \brief Emit a source location. 315 void AddSourceLocation(SourceLocation Loc, RecordData &Record); 316 317 /// \brief Emit a source range. 318 void AddSourceRange(SourceRange Range, RecordData &Record); 319 320 /// \brief Emit an integral value. 321 void AddAPInt(const llvm::APInt &Value, RecordData &Record); 322 323 /// \brief Emit a signed integral value. 324 void AddAPSInt(const llvm::APSInt &Value, RecordData &Record); 325 326 /// \brief Emit a floating-point value. 327 void AddAPFloat(const llvm::APFloat &Value, RecordData &Record); 328 329 /// \brief Emit a reference to an identifier. 330 void AddIdentifierRef(const IdentifierInfo *II, RecordData &Record); 331 332 /// \brief Emit a Selector (which is a smart pointer reference). 333 void AddSelectorRef(Selector, RecordData &Record); 334 335 /// \brief Emit a CXXTemporary. 336 void AddCXXTemporary(const CXXTemporary *Temp, RecordData &Record); 337 338 /// \brief Get the unique number used to refer to the given selector. 339 serialization::SelectorID getSelectorRef(Selector Sel); 340 341 /// \brief Get the unique number used to refer to the given identifier. 342 serialization::IdentID getIdentifierRef(const IdentifierInfo *II); 343 344 /// \brief Retrieve the offset of the macro definition for the given 345 /// identifier. 346 /// 347 /// The identifier must refer to a macro. 348 uint64_t getMacroOffset(const IdentifierInfo *II) { 349 assert(MacroOffsets.find(II) != MacroOffsets.end() && 350 "Identifier does not name a macro"); 351 return MacroOffsets[II]; 352 } 353 354 /// \brief Retrieve the ID number corresponding to the given macro 355 /// definition. 356 serialization::MacroID getMacroDefinitionID(MacroDefinition *MD); 357 358 /// \brief Emit a reference to a type. 359 void AddTypeRef(QualType T, RecordData &Record); 360 361 /// \brief Force a type to be emitted and get its ID. 362 serialization::TypeID GetOrCreateTypeID(QualType T); 363 364 /// \brief Determine the type ID of an already-emitted type. 365 serialization::TypeID getTypeID(QualType T) const; 366 367 /// \brief Force a type to be emitted and get its index. 368 serialization::TypeIdx GetOrCreateTypeIdx(QualType T); 369 370 /// \brief Determine the type index of an already-emitted type. 371 serialization::TypeIdx getTypeIdx(QualType T) const; 372 373 /// \brief Emits a reference to a declarator info. 374 void AddTypeSourceInfo(TypeSourceInfo *TInfo, RecordData &Record); 375 376 /// \brief Emits a template argument location info. 377 void AddTemplateArgumentLocInfo(TemplateArgument::ArgKind Kind, 378 const TemplateArgumentLocInfo &Arg, 379 RecordData &Record); 380 381 /// \brief Emits a template argument location. 382 void AddTemplateArgumentLoc(const TemplateArgumentLoc &Arg, 383 RecordData &Record); 384 385 /// \brief Emit a reference to a declaration. 386 void AddDeclRef(const Decl *D, RecordData &Record); 387 388 /// \brief Force a declaration to be emitted and get its ID. 389 serialization::DeclID GetDeclRef(const Decl *D); 390 391 /// \brief Determine the declaration ID of an already-emitted 392 /// declaration. 393 serialization::DeclID getDeclID(const Decl *D); 394 395 /// \brief Emit a declaration name. 396 void AddDeclarationName(DeclarationName Name, RecordData &Record); 397 398 /// \brief Emit a nested name specifier. 399 void AddNestedNameSpecifier(NestedNameSpecifier *NNS, RecordData &Record); 400 401 /// \brief Emit a template name. 402 void AddTemplateName(TemplateName Name, RecordData &Record); 403 404 /// \brief Emit a template argument. 405 void AddTemplateArgument(const TemplateArgument &Arg, RecordData &Record); 406 407 /// \brief Emit a template parameter list. 408 void AddTemplateParameterList(const TemplateParameterList *TemplateParams, 409 RecordData &Record); 410 411 /// \brief Emit a template argument list. 412 void AddTemplateArgumentList(const TemplateArgumentList *TemplateArgs, 413 RecordData &Record); 414 415 /// \brief Emit a UnresolvedSet structure. 416 void AddUnresolvedSet(const UnresolvedSetImpl &Set, RecordData &Record); 417 418 /// \brief Emit a C++ base specifier. 419 void AddCXXBaseSpecifier(const CXXBaseSpecifier &Base, RecordData &Record); 420 421 /// \brief Emit a CXXBaseOrMemberInitializer array. 422 void AddCXXBaseOrMemberInitializers( 423 const CXXBaseOrMemberInitializer * const *BaseOrMembers, 424 unsigned NumBaseOrMembers, RecordData &Record); 425 426 /// \brief Add a string to the given record. 427 void AddString(llvm::StringRef Str, RecordData &Record); 428 429 /// \brief Mark a namespace as needing an update. 430 void AddUpdatedNamespace(const NamespaceDecl *NS) { 431 UpdatedNamespaces.insert(NS); 432 } 433 434 /// \brief Record a template specialization or partial specialization of 435 /// a template from a previous PCH file. 436 void AddAdditionalTemplateSpecialization(serialization::DeclID Templ, 437 serialization::DeclID Spec) { 438 AdditionalTemplateSpecializations[Templ].push_back(Spec); 439 } 440 441 /// \brief Note that the identifier II occurs at the given offset 442 /// within the identifier table. 443 void SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset); 444 445 /// \brief Note that the selector Sel occurs at the given offset 446 /// within the method pool/selector table. 447 void SetSelectorOffset(Selector Sel, uint32_t Offset); 448 449 /// \brief Add the given statement or expression to the queue of 450 /// statements to emit. 451 /// 452 /// This routine should be used when emitting types and declarations 453 /// that have expressions as part of their formulation. Once the 454 /// type or declaration has been written, call FlushStmts() to write 455 /// the corresponding statements just after the type or 456 /// declaration. 457 void AddStmt(Stmt *S) { 458 CollectedStmts->push_back(S); 459 } 460 461 /// \brief Flush all of the statements and expressions that have 462 /// been added to the queue via AddStmt(). 463 void FlushStmts(); 464 465 /// \brief Record an ID for the given switch-case statement. 466 unsigned RecordSwitchCaseID(SwitchCase *S); 467 468 /// \brief Retrieve the ID for the given switch-case statement. 469 unsigned getSwitchCaseID(SwitchCase *S); 470 471 /// \brief Retrieve the ID for the given label statement, which may 472 /// or may not have been emitted yet. 473 unsigned GetLabelID(LabelStmt *S); 474 475 unsigned getParmVarDeclAbbrev() const { return ParmVarDeclAbbrev; } 476 477 bool hasChain() const { return Chain; } 478 479 // ASTDeserializationListener implementation 480 void SetReader(ASTReader *Reader); 481 void IdentifierRead(serialization::IdentID ID, IdentifierInfo *II); 482 void TypeRead(serialization::TypeIdx Idx, QualType T); 483 void DeclRead(serialization::DeclID ID, const Decl *D); 484 void SelectorRead(serialization::SelectorID iD, Selector Sel); 485}; 486 487/// \brief AST and semantic-analysis consumer that generates a 488/// precompiled header from the parsed source code. 489class PCHGenerator : public SemaConsumer { 490 const Preprocessor &PP; 491 const char *isysroot; 492 llvm::raw_ostream *Out; 493 Sema *SemaPtr; 494 MemorizeStatCalls *StatCalls; // owned by the FileManager 495 std::vector<unsigned char> Buffer; 496 llvm::BitstreamWriter Stream; 497 ASTWriter Writer; 498 499protected: 500 ASTWriter &getWriter() { return Writer; } 501 const ASTWriter &getWriter() const { return Writer; } 502 503public: 504 PCHGenerator(const Preprocessor &PP, bool Chaining, 505 const char *isysroot, llvm::raw_ostream *Out); 506 virtual void InitializeSema(Sema &S) { SemaPtr = &S; } 507 virtual void HandleTranslationUnit(ASTContext &Ctx); 508 virtual ASTDeserializationListener *GetASTDeserializationListener(); 509}; 510 511} // end namespace clang 512 513#endif 514