1//===-- llvm/Bitcode/BitcodeReader.h - Bitcode reader ----*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This header defines interfaces to read LLVM bitcode files/streams.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_BITCODE_BITCODEREADER_H
15#define LLVM_BITCODE_BITCODEREADER_H
16
17#include "llvm/Bitcode/BitCodes.h"
18#include "llvm/IR/DiagnosticInfo.h"
19#include "llvm/IR/ModuleSummaryIndex.h"
20#include "llvm/Support/Endian.h"
21#include "llvm/Support/Error.h"
22#include "llvm/Support/ErrorOr.h"
23#include "llvm/Support/MemoryBuffer.h"
24#include <memory>
25
26namespace llvm {
27  class LLVMContext;
28  class Module;
29
30  // These functions are for converting Expected/Error values to
31  // ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
32  // Remove these functions once no longer needed by the C and libLTO APIs.
33
34  std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err);
35
36  template <typename T>
37  ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
38    if (!Val)
39      return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
40    return std::move(*Val);
41  }
42
43  struct BitcodeFileContents;
44
45  /// Basic information extracted from a bitcode module to be used for LTO.
46  struct BitcodeLTOInfo {
47    bool IsThinLTO;
48    bool HasSummary;
49  };
50
51  /// Represents a module in a bitcode file.
52  class BitcodeModule {
53    // This covers the identification (if present) and module blocks.
54    ArrayRef<uint8_t> Buffer;
55    StringRef ModuleIdentifier;
56
57    // The string table used to interpret this module.
58    StringRef Strtab;
59
60    // The bitstream location of the IDENTIFICATION_BLOCK.
61    uint64_t IdentificationBit;
62
63    // The bitstream location of this module's MODULE_BLOCK.
64    uint64_t ModuleBit;
65
66    BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
67                  uint64_t IdentificationBit, uint64_t ModuleBit)
68        : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
69          IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
70
71    // Calls the ctor.
72    friend Expected<BitcodeFileContents>
73    getBitcodeFileContents(MemoryBufferRef Buffer);
74
75    Expected<std::unique_ptr<Module>> getModuleImpl(LLVMContext &Context,
76                                                    bool MaterializeAll,
77                                                    bool ShouldLazyLoadMetadata,
78                                                    bool IsImporting);
79
80  public:
81    StringRef getBuffer() const {
82      return StringRef((const char *)Buffer.begin(), Buffer.size());
83    }
84    StringRef getStrtab() const { return Strtab; }
85
86    StringRef getModuleIdentifier() const { return ModuleIdentifier; }
87
88    /// Read the bitcode module and prepare for lazy deserialization of function
89    /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
90    /// If IsImporting is true, this module is being parsed for ThinLTO
91    /// importing into another module.
92    Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context,
93                                                    bool ShouldLazyLoadMetadata,
94                                                    bool IsImporting);
95
96    /// Read the entire bitcode module and return it.
97    Expected<std::unique_ptr<Module>> parseModule(LLVMContext &Context);
98
99    /// Returns information about the module to be used for LTO: whether to
100    /// compile with ThinLTO, and whether it has a summary.
101    Expected<BitcodeLTOInfo> getLTOInfo();
102
103    /// Parse the specified bitcode buffer, returning the module summary index.
104    Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
105
106    /// Parse the specified bitcode buffer and merge its module summary index
107    /// into CombinedIndex.
108    Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
109                      uint64_t ModuleId);
110  };
111
112  struct BitcodeFileContents {
113    std::vector<BitcodeModule> Mods;
114  };
115
116  /// Returns the contents of a bitcode file.
117  Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer);
118
119  /// Returns a list of modules in the specified bitcode buffer.
120  Expected<std::vector<BitcodeModule>>
121  getBitcodeModuleList(MemoryBufferRef Buffer);
122
123  /// Read the header of the specified bitcode buffer and prepare for lazy
124  /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
125  /// lazily load metadata as well. If IsImporting is true, this module is
126  /// being parsed for ThinLTO importing into another module.
127  Expected<std::unique_ptr<Module>>
128  getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
129                       bool ShouldLazyLoadMetadata = false,
130                       bool IsImporting = false);
131
132  /// Like getLazyBitcodeModule, except that the module takes ownership of
133  /// the memory buffer if successful. If successful, this moves Buffer. On
134  /// error, this *does not* move Buffer. If IsImporting is true, this module is
135  /// being parsed for ThinLTO importing into another module.
136  Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
137      std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
138      bool ShouldLazyLoadMetadata = false, bool IsImporting = false);
139
140  /// Read the header of the specified bitcode buffer and extract just the
141  /// triple information. If successful, this returns a string. On error, this
142  /// returns "".
143  Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
144
145  /// Return true if \p Buffer contains a bitcode file with ObjC code (category
146  /// or class) in it.
147  Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
148
149  /// Read the header of the specified bitcode buffer and extract just the
150  /// producer string information. If successful, this returns a string. On
151  /// error, this returns "".
152  Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer);
153
154  /// Read the specified bitcode file, returning the module.
155  Expected<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer,
156                                                     LLVMContext &Context);
157
158  /// Returns LTO information for the specified bitcode file.
159  Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer);
160
161  /// Parse the specified bitcode buffer, returning the module summary index.
162  Expected<std::unique_ptr<ModuleSummaryIndex>>
163  getModuleSummaryIndex(MemoryBufferRef Buffer);
164
165  /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
166  Error readModuleSummaryIndex(MemoryBufferRef Buffer,
167                               ModuleSummaryIndex &CombinedIndex,
168                               uint64_t ModuleId);
169
170  /// Parse the module summary index out of an IR file and return the module
171  /// summary index object if found, or an empty summary if not. If Path refers
172  /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
173  /// this function will return nullptr.
174  Expected<std::unique_ptr<ModuleSummaryIndex>>
175  getModuleSummaryIndexForFile(StringRef Path,
176                               bool IgnoreEmptyThinLTOIndexFile = false);
177
178  /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
179  /// for an LLVM IR bitcode wrapper.
180  ///
181  inline bool isBitcodeWrapper(const unsigned char *BufPtr,
182                               const unsigned char *BufEnd) {
183    // See if you can find the hidden message in the magic bytes :-).
184    // (Hint: it's a little-endian encoding.)
185    return BufPtr != BufEnd &&
186           BufPtr[0] == 0xDE &&
187           BufPtr[1] == 0xC0 &&
188           BufPtr[2] == 0x17 &&
189           BufPtr[3] == 0x0B;
190  }
191
192  /// isRawBitcode - Return true if the given bytes are the magic bytes for
193  /// raw LLVM IR bitcode (without a wrapper).
194  ///
195  inline bool isRawBitcode(const unsigned char *BufPtr,
196                           const unsigned char *BufEnd) {
197    // These bytes sort of have a hidden message, but it's not in
198    // little-endian this time, and it's a little redundant.
199    return BufPtr != BufEnd &&
200           BufPtr[0] == 'B' &&
201           BufPtr[1] == 'C' &&
202           BufPtr[2] == 0xc0 &&
203           BufPtr[3] == 0xde;
204  }
205
206  /// isBitcode - Return true if the given bytes are the magic bytes for
207  /// LLVM IR bitcode, either with or without a wrapper.
208  ///
209  inline bool isBitcode(const unsigned char *BufPtr,
210                        const unsigned char *BufEnd) {
211    return isBitcodeWrapper(BufPtr, BufEnd) ||
212           isRawBitcode(BufPtr, BufEnd);
213  }
214
215  /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
216  /// header for padding or other reasons.  The format of this header is:
217  ///
218  /// struct bc_header {
219  ///   uint32_t Magic;         // 0x0B17C0DE
220  ///   uint32_t Version;       // Version, currently always 0.
221  ///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
222  ///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
223  ///   ... potentially other gunk ...
224  /// };
225  ///
226  /// This function is called when we find a file with a matching magic number.
227  /// In this case, skip down to the subsection of the file that is actually a
228  /// BC file.
229  /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
230  /// contain the whole bitcode file.
231  inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
232                                       const unsigned char *&BufEnd,
233                                       bool VerifyBufferSize) {
234    // Must contain the offset and size field!
235    if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
236      return true;
237
238    unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
239    unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
240    uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
241
242    // Verify that Offset+Size fits in the file.
243    if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
244      return true;
245    BufPtr += Offset;
246    BufEnd = BufPtr+Size;
247    return false;
248  }
249
250  const std::error_category &BitcodeErrorCategory();
251  enum class BitcodeError { CorruptedBitcode = 1 };
252  inline std::error_code make_error_code(BitcodeError E) {
253    return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
254  }
255
256} // End llvm namespace
257
258namespace std {
259template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
260}
261
262#endif
263