1//===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This header defines interfaces to read LLVM bitcode files/streams.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_BITCODE_BITCODEREADER_H
15#define LLVM_BITCODE_BITCODEREADER_H
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/Bitcode/BitCodes.h"
20#include "llvm/IR/ModuleSummaryIndex.h"
21#include "llvm/Support/Endian.h"
22#include "llvm/Support/Error.h"
23#include "llvm/Support/ErrorOr.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include <cstdint>
26#include <memory>
27#include <string>
28#include <system_error>
29#include <vector>
30namespace llvm {
31
32class LLVMContext;
33class Module;
34
35  // These functions are for converting Expected/Error values to
36  // ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
37  // Remove these functions once no longer needed by the C and libLTO APIs.
38
39  std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err);
40
41  template <typename T>
42  ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
43    if (!Val)
44      return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
45    return std::move(*Val);
46  }
47
48  struct BitcodeFileContents;
49
50  /// Basic information extracted from a bitcode module to be used for LTO.
51  struct BitcodeLTOInfo {
52    bool IsThinLTO;
53    bool HasSummary;
54  };
55
56  /// Represents a module in a bitcode file.
57  class BitcodeModule {
58    // This covers the identification (if present) and module blocks.
59    ArrayRef<uint8_t> Buffer;
60    StringRef ModuleIdentifier;
61
62    // The string table used to interpret this module.
63    StringRef Strtab;
64
65    // The bitstream location of the IDENTIFICATION_BLOCK.
66    uint64_t IdentificationBit;
67
68    // The bitstream location of this module's MODULE_BLOCK.
69    uint64_t ModuleBit;
70
71    BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
72                  uint64_t IdentificationBit, uint64_t ModuleBit)
73        : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
74          IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
75
76    // Calls the ctor.
77    friend Expected<BitcodeFileContents>
78    getBitcodeFileContents(MemoryBufferRef Buffer);
79
80    Expected<std::unique_ptr<Module>> getModuleImpl(LLVMContext &Context,
81                                                    bool MaterializeAll,
82                                                    bool ShouldLazyLoadMetadata,
83                                                    bool IsImporting);
84
85  public:
86    StringRef getBuffer() const {
87      return StringRef((const char *)Buffer.begin(), Buffer.size());
88    }
89
90    StringRef getStrtab() const { return Strtab; }
91
92    StringRef getModuleIdentifier() const { return ModuleIdentifier; }
93
94    /// Read the bitcode module and prepare for lazy deserialization of function
95    /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
96    /// If IsImporting is true, this module is being parsed for ThinLTO
97    /// importing into another module.
98    Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context,
99                                                    bool ShouldLazyLoadMetadata,
100                                                    bool IsImporting);
101
102    /// Read the entire bitcode module and return it.
103    Expected<std::unique_ptr<Module>> parseModule(LLVMContext &Context);
104
105    /// Returns information about the module to be used for LTO: whether to
106    /// compile with ThinLTO, and whether it has a summary.
107    Expected<BitcodeLTOInfo> getLTOInfo();
108
109    /// Parse the specified bitcode buffer, returning the module summary index.
110    Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
111
112    /// Parse the specified bitcode buffer and merge its module summary index
113    /// into CombinedIndex.
114    Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
115                      uint64_t ModuleId);
116  };
117
118  struct BitcodeFileContents {
119    std::vector<BitcodeModule> Mods;
120    StringRef Symtab, StrtabForSymtab;
121  };
122
123  /// Returns the contents of a bitcode file. This includes the raw contents of
124  /// the symbol table embedded in the bitcode file. Clients which require a
125  /// symbol table should prefer to use irsymtab::read instead of this function
126  /// because it creates a reader for the irsymtab and handles upgrading bitcode
127  /// files without a symbol table or with an old symbol table.
128  Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer);
129
130  /// Returns a list of modules in the specified bitcode buffer.
131  Expected<std::vector<BitcodeModule>>
132  getBitcodeModuleList(MemoryBufferRef Buffer);
133
134  /// Read the header of the specified bitcode buffer and prepare for lazy
135  /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
136  /// lazily load metadata as well. If IsImporting is true, this module is
137  /// being parsed for ThinLTO importing into another module.
138  Expected<std::unique_ptr<Module>>
139  getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
140                       bool ShouldLazyLoadMetadata = false,
141                       bool IsImporting = false);
142
143  /// Like getLazyBitcodeModule, except that the module takes ownership of
144  /// the memory buffer if successful. If successful, this moves Buffer. On
145  /// error, this *does not* move Buffer. If IsImporting is true, this module is
146  /// being parsed for ThinLTO importing into another module.
147  Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
148      std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
149      bool ShouldLazyLoadMetadata = false, bool IsImporting = false);
150
151  /// Read the header of the specified bitcode buffer and extract just the
152  /// triple information. If successful, this returns a string. On error, this
153  /// returns "".
154  Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
155
156  /// Return true if \p Buffer contains a bitcode file with ObjC code (category
157  /// or class) in it.
158  Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
159
160  /// Read the header of the specified bitcode buffer and extract just the
161  /// producer string information. If successful, this returns a string. On
162  /// error, this returns "".
163  Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer);
164
165  /// Read the specified bitcode file, returning the module.
166  Expected<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer,
167                                                     LLVMContext &Context);
168
169  /// Returns LTO information for the specified bitcode file.
170  Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer);
171
172  /// Parse the specified bitcode buffer, returning the module summary index.
173  Expected<std::unique_ptr<ModuleSummaryIndex>>
174  getModuleSummaryIndex(MemoryBufferRef Buffer);
175
176  /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
177  Error readModuleSummaryIndex(MemoryBufferRef Buffer,
178                               ModuleSummaryIndex &CombinedIndex,
179                               uint64_t ModuleId);
180
181  /// Parse the module summary index out of an IR file and return the module
182  /// summary index object if found, or an empty summary if not. If Path refers
183  /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
184  /// this function will return nullptr.
185  Expected<std::unique_ptr<ModuleSummaryIndex>>
186  getModuleSummaryIndexForFile(StringRef Path,
187                               bool IgnoreEmptyThinLTOIndexFile = false);
188
189  /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
190  /// for an LLVM IR bitcode wrapper.
191  inline bool isBitcodeWrapper(const unsigned char *BufPtr,
192                               const unsigned char *BufEnd) {
193    // See if you can find the hidden message in the magic bytes :-).
194    // (Hint: it's a little-endian encoding.)
195    return BufPtr != BufEnd &&
196           BufPtr[0] == 0xDE &&
197           BufPtr[1] == 0xC0 &&
198           BufPtr[2] == 0x17 &&
199           BufPtr[3] == 0x0B;
200  }
201
202  /// isRawBitcode - Return true if the given bytes are the magic bytes for
203  /// raw LLVM IR bitcode (without a wrapper).
204  inline bool isRawBitcode(const unsigned char *BufPtr,
205                           const unsigned char *BufEnd) {
206    // These bytes sort of have a hidden message, but it's not in
207    // little-endian this time, and it's a little redundant.
208    return BufPtr != BufEnd &&
209           BufPtr[0] == 'B' &&
210           BufPtr[1] == 'C' &&
211           BufPtr[2] == 0xc0 &&
212           BufPtr[3] == 0xde;
213  }
214
215  /// isBitcode - Return true if the given bytes are the magic bytes for
216  /// LLVM IR bitcode, either with or without a wrapper.
217  inline bool isBitcode(const unsigned char *BufPtr,
218                        const unsigned char *BufEnd) {
219    return isBitcodeWrapper(BufPtr, BufEnd) ||
220           isRawBitcode(BufPtr, BufEnd);
221  }
222
223  /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
224  /// header for padding or other reasons.  The format of this header is:
225  ///
226  /// struct bc_header {
227  ///   uint32_t Magic;         // 0x0B17C0DE
228  ///   uint32_t Version;       // Version, currently always 0.
229  ///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
230  ///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
231  ///   ... potentially other gunk ...
232  /// };
233  ///
234  /// This function is called when we find a file with a matching magic number.
235  /// In this case, skip down to the subsection of the file that is actually a
236  /// BC file.
237  /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
238  /// contain the whole bitcode file.
239  inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
240                                       const unsigned char *&BufEnd,
241                                       bool VerifyBufferSize) {
242    // Must contain the offset and size field!
243    if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
244      return true;
245
246    unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
247    unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
248    uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
249
250    // Verify that Offset+Size fits in the file.
251    if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
252      return true;
253    BufPtr += Offset;
254    BufEnd = BufPtr+Size;
255    return false;
256  }
257
258  const std::error_category &BitcodeErrorCategory();
259  enum class BitcodeError { CorruptedBitcode = 1 };
260  inline std::error_code make_error_code(BitcodeError E) {
261    return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
262  }
263
264} // end namespace llvm
265
266namespace std {
267
268template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
269
270} // end namespace std
271
272#endif // LLVM_BITCODE_BITCODEREADER_H
273