1//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This header defines the BitstreamReader class.  This class can be used to
11// read an arbitrary bitstream, regardless of its contents.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_BITCODE_BITSTREAMREADER_H
16#define LLVM_BITCODE_BITSTREAMREADER_H
17
18#include "llvm/Bitcode/BitCodes.h"
19#include "llvm/Support/Endian.h"
20#include "llvm/Support/StreamableMemoryObject.h"
21#include <climits>
22#include <string>
23#include <vector>
24
25namespace llvm {
26
27  class Deserializer;
28
29/// BitstreamReader - This class is used to read from an LLVM bitcode stream,
30/// maintaining information that is global to decoding the entire file.  While
31/// a file is being read, multiple cursors can be independently advanced or
32/// skipped around within the file.  These are represented by the
33/// BitstreamCursor class.
34class BitstreamReader {
35public:
36  /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
37  /// These describe abbreviations that all blocks of the specified ID inherit.
38  struct BlockInfo {
39    unsigned BlockID;
40    std::vector<BitCodeAbbrev*> Abbrevs;
41    std::string Name;
42
43    std::vector<std::pair<unsigned, std::string> > RecordNames;
44  };
45private:
46  std::unique_ptr<StreamableMemoryObject> BitcodeBytes;
47
48  std::vector<BlockInfo> BlockInfoRecords;
49
50  /// IgnoreBlockInfoNames - This is set to true if we don't care about the
51  /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer
52  /// uses this.
53  bool IgnoreBlockInfoNames;
54
55  BitstreamReader(const BitstreamReader&) LLVM_DELETED_FUNCTION;
56  void operator=(const BitstreamReader&) LLVM_DELETED_FUNCTION;
57public:
58  BitstreamReader() : IgnoreBlockInfoNames(true) {
59  }
60
61  BitstreamReader(const unsigned char *Start, const unsigned char *End) {
62    IgnoreBlockInfoNames = true;
63    init(Start, End);
64  }
65
66  BitstreamReader(StreamableMemoryObject *bytes) {
67    BitcodeBytes.reset(bytes);
68  }
69
70  void init(const unsigned char *Start, const unsigned char *End) {
71    assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
72    BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
73  }
74
75  StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
76
77  ~BitstreamReader() {
78    // Free the BlockInfoRecords.
79    while (!BlockInfoRecords.empty()) {
80      BlockInfo &Info = BlockInfoRecords.back();
81      // Free blockinfo abbrev info.
82      for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
83           i != e; ++i)
84        Info.Abbrevs[i]->dropRef();
85      BlockInfoRecords.pop_back();
86    }
87  }
88
89  /// CollectBlockInfoNames - This is called by clients that want block/record
90  /// name information.
91  void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
92  bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
93
94  //===--------------------------------------------------------------------===//
95  // Block Manipulation
96  //===--------------------------------------------------------------------===//
97
98  /// hasBlockInfoRecords - Return true if we've already read and processed the
99  /// block info block for this Bitstream.  We only process it for the first
100  /// cursor that walks over it.
101  bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
102
103  /// getBlockInfo - If there is block info for the specified ID, return it,
104  /// otherwise return null.
105  const BlockInfo *getBlockInfo(unsigned BlockID) const {
106    // Common case, the most recent entry matches BlockID.
107    if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
108      return &BlockInfoRecords.back();
109
110    for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
111         i != e; ++i)
112      if (BlockInfoRecords[i].BlockID == BlockID)
113        return &BlockInfoRecords[i];
114    return nullptr;
115  }
116
117  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
118    if (const BlockInfo *BI = getBlockInfo(BlockID))
119      return *const_cast<BlockInfo*>(BI);
120
121    // Otherwise, add a new record.
122    BlockInfoRecords.push_back(BlockInfo());
123    BlockInfoRecords.back().BlockID = BlockID;
124    return BlockInfoRecords.back();
125  }
126};
127
128
129/// BitstreamEntry - When advancing through a bitstream cursor, each advance can
130/// discover a few different kinds of entries:
131///   Error    - Malformed bitcode was found.
132///   EndBlock - We've reached the end of the current block, (or the end of the
133///              file, which is treated like a series of EndBlock records.
134///   SubBlock - This is the start of a new subblock of a specific ID.
135///   Record   - This is a record with a specific AbbrevID.
136///
137struct BitstreamEntry {
138  enum {
139    Error,
140    EndBlock,
141    SubBlock,
142    Record
143  } Kind;
144
145  unsigned ID;
146
147  static BitstreamEntry getError() {
148    BitstreamEntry E; E.Kind = Error; return E;
149  }
150  static BitstreamEntry getEndBlock() {
151    BitstreamEntry E; E.Kind = EndBlock; return E;
152  }
153  static BitstreamEntry getSubBlock(unsigned ID) {
154    BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
155  }
156  static BitstreamEntry getRecord(unsigned AbbrevID) {
157    BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
158  }
159};
160
161/// BitstreamCursor - This represents a position within a bitcode file.  There
162/// may be multiple independent cursors reading within one bitstream, each
163/// maintaining their own local state.
164///
165/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
166/// be passed by value.
167class BitstreamCursor {
168  friend class Deserializer;
169  BitstreamReader *BitStream;
170  size_t NextChar;
171
172
173  /// CurWord/word_t - This is the current data we have pulled from the stream
174  /// but have not returned to the client.  This is specifically and
175  /// intentionally defined to follow the word size of the host machine for
176  /// efficiency.  We use word_t in places that are aware of this to make it
177  /// perfectly explicit what is going on.
178  typedef uint32_t word_t;
179  word_t CurWord;
180
181  /// BitsInCurWord - This is the number of bits in CurWord that are valid. This
182  /// is always from [0...31/63] inclusive (depending on word size).
183  unsigned BitsInCurWord;
184
185  // CurCodeSize - This is the declared size of code values used for the current
186  // block, in bits.
187  unsigned CurCodeSize;
188
189  /// CurAbbrevs - Abbrevs installed at in this block.
190  std::vector<BitCodeAbbrev*> CurAbbrevs;
191
192  struct Block {
193    unsigned PrevCodeSize;
194    std::vector<BitCodeAbbrev*> PrevAbbrevs;
195    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
196  };
197
198  /// BlockScope - This tracks the codesize of parent blocks.
199  SmallVector<Block, 8> BlockScope;
200
201
202public:
203  BitstreamCursor() : BitStream(nullptr), NextChar(0) {}
204  BitstreamCursor(const BitstreamCursor &RHS)
205      : BitStream(nullptr), NextChar(0) {
206    operator=(RHS);
207  }
208
209  explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
210    NextChar = 0;
211    CurWord = 0;
212    BitsInCurWord = 0;
213    CurCodeSize = 2;
214  }
215
216  void init(BitstreamReader &R) {
217    freeState();
218
219    BitStream = &R;
220    NextChar = 0;
221    CurWord = 0;
222    BitsInCurWord = 0;
223    CurCodeSize = 2;
224  }
225
226  ~BitstreamCursor() {
227    freeState();
228  }
229
230  void operator=(const BitstreamCursor &RHS);
231
232  void freeState();
233
234  bool isEndPos(size_t pos) {
235    return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
236  }
237
238  bool canSkipToPos(size_t pos) const {
239    // pos can be skipped to if it is a valid address or one byte past the end.
240    return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
241        static_cast<uint64_t>(pos - 1));
242  }
243
244  uint32_t getWord(size_t pos) {
245    uint8_t buf[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
246    BitStream->getBitcodeBytes().readBytes(pos, sizeof(buf), buf);
247    return *reinterpret_cast<support::ulittle32_t *>(buf);
248  }
249
250  bool AtEndOfStream() {
251    return BitsInCurWord == 0 && isEndPos(NextChar);
252  }
253
254  /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
255  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
256
257  /// GetCurrentBitNo - Return the bit # of the bit we are reading.
258  uint64_t GetCurrentBitNo() const {
259    return NextChar*CHAR_BIT - BitsInCurWord;
260  }
261
262  BitstreamReader *getBitStreamReader() {
263    return BitStream;
264  }
265  const BitstreamReader *getBitStreamReader() const {
266    return BitStream;
267  }
268
269  /// Flags that modify the behavior of advance().
270  enum {
271    /// AF_DontPopBlockAtEnd - If this flag is used, the advance() method does
272    /// not automatically pop the block scope when the end of a block is
273    /// reached.
274    AF_DontPopBlockAtEnd = 1,
275
276    /// AF_DontAutoprocessAbbrevs - If this flag is used, abbrev entries are
277    /// returned just like normal records.
278    AF_DontAutoprocessAbbrevs = 2
279  };
280
281  /// advance - Advance the current bitstream, returning the next entry in the
282  /// stream.
283  BitstreamEntry advance(unsigned Flags = 0) {
284    while (1) {
285      unsigned Code = ReadCode();
286      if (Code == bitc::END_BLOCK) {
287        // Pop the end of the block unless Flags tells us not to.
288        if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
289          return BitstreamEntry::getError();
290        return BitstreamEntry::getEndBlock();
291      }
292
293      if (Code == bitc::ENTER_SUBBLOCK)
294        return BitstreamEntry::getSubBlock(ReadSubBlockID());
295
296      if (Code == bitc::DEFINE_ABBREV &&
297          !(Flags & AF_DontAutoprocessAbbrevs)) {
298        // We read and accumulate abbrev's, the client can't do anything with
299        // them anyway.
300        ReadAbbrevRecord();
301        continue;
302      }
303
304      return BitstreamEntry::getRecord(Code);
305    }
306  }
307
308  /// advanceSkippingSubblocks - This is a convenience function for clients that
309  /// don't expect any subblocks.  This just skips over them automatically.
310  BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
311    while (1) {
312      // If we found a normal entry, return it.
313      BitstreamEntry Entry = advance(Flags);
314      if (Entry.Kind != BitstreamEntry::SubBlock)
315        return Entry;
316
317      // If we found a sub-block, just skip over it and check the next entry.
318      if (SkipBlock())
319        return BitstreamEntry::getError();
320    }
321  }
322
323  /// JumpToBit - Reset the stream to the specified bit number.
324  void JumpToBit(uint64_t BitNo) {
325    uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1);
326    unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
327    assert(canSkipToPos(ByteNo) && "Invalid location");
328
329    // Move the cursor to the right word.
330    NextChar = ByteNo;
331    BitsInCurWord = 0;
332    CurWord = 0;
333
334    // Skip over any bits that are already consumed.
335    if (WordBitNo) {
336      if (sizeof(word_t) > 4)
337        Read64(WordBitNo);
338      else
339        Read(WordBitNo);
340    }
341  }
342
343
344  uint32_t Read(unsigned NumBits) {
345    assert(NumBits && NumBits <= 32 &&
346           "Cannot return zero or more than 32 bits!");
347
348    // If the field is fully contained by CurWord, return it quickly.
349    if (BitsInCurWord >= NumBits) {
350      uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits));
351      CurWord >>= NumBits;
352      BitsInCurWord -= NumBits;
353      return R;
354    }
355
356    // If we run out of data, stop at the end of the stream.
357    if (isEndPos(NextChar)) {
358      CurWord = 0;
359      BitsInCurWord = 0;
360      return 0;
361    }
362
363    uint32_t R = uint32_t(CurWord);
364
365    // Read the next word from the stream.
366    uint8_t Array[sizeof(word_t)] = {0};
367
368    BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array), Array);
369
370    // Handle big-endian byte-swapping if necessary.
371    support::detail::packed_endian_specific_integral
372      <word_t, support::little, support::unaligned> EndianValue;
373    memcpy(&EndianValue, Array, sizeof(Array));
374
375    CurWord = EndianValue;
376
377    NextChar += sizeof(word_t);
378
379    // Extract NumBits-BitsInCurWord from what we just read.
380    unsigned BitsLeft = NumBits-BitsInCurWord;
381
382    // Be careful here, BitsLeft is in the range [1..32]/[1..64] inclusive.
383    R |= uint32_t((CurWord & (word_t(~0ULL) >> (sizeof(word_t)*8-BitsLeft)))
384                    << BitsInCurWord);
385
386    // BitsLeft bits have just been used up from CurWord.  BitsLeft is in the
387    // range [1..32]/[1..64] so be careful how we shift.
388    if (BitsLeft != sizeof(word_t)*8)
389      CurWord >>= BitsLeft;
390    else
391      CurWord = 0;
392    BitsInCurWord = sizeof(word_t)*8-BitsLeft;
393    return R;
394  }
395
396  uint64_t Read64(unsigned NumBits) {
397    if (NumBits <= 32) return Read(NumBits);
398
399    uint64_t V = Read(32);
400    return V | (uint64_t)Read(NumBits-32) << 32;
401  }
402
403  uint32_t ReadVBR(unsigned NumBits) {
404    uint32_t Piece = Read(NumBits);
405    if ((Piece & (1U << (NumBits-1))) == 0)
406      return Piece;
407
408    uint32_t Result = 0;
409    unsigned NextBit = 0;
410    while (1) {
411      Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
412
413      if ((Piece & (1U << (NumBits-1))) == 0)
414        return Result;
415
416      NextBit += NumBits-1;
417      Piece = Read(NumBits);
418    }
419  }
420
421  // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size.  The
422  // chunk size of the VBR must still be <= 32 bits though.
423  uint64_t ReadVBR64(unsigned NumBits) {
424    uint32_t Piece = Read(NumBits);
425    if ((Piece & (1U << (NumBits-1))) == 0)
426      return uint64_t(Piece);
427
428    uint64_t Result = 0;
429    unsigned NextBit = 0;
430    while (1) {
431      Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
432
433      if ((Piece & (1U << (NumBits-1))) == 0)
434        return Result;
435
436      NextBit += NumBits-1;
437      Piece = Read(NumBits);
438    }
439  }
440
441private:
442  void SkipToFourByteBoundary() {
443    // If word_t is 64-bits and if we've read less than 32 bits, just dump
444    // the bits we have up to the next 32-bit boundary.
445    if (sizeof(word_t) > 4 &&
446        BitsInCurWord >= 32) {
447      CurWord >>= BitsInCurWord-32;
448      BitsInCurWord = 32;
449      return;
450    }
451
452    BitsInCurWord = 0;
453    CurWord = 0;
454  }
455public:
456
457  unsigned ReadCode() {
458    return Read(CurCodeSize);
459  }
460
461
462  // Block header:
463  //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
464
465  /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for
466  /// the block.
467  unsigned ReadSubBlockID() {
468    return ReadVBR(bitc::BlockIDWidth);
469  }
470
471  /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip
472  /// over the body of this block.  If the block record is malformed, return
473  /// true.
474  bool SkipBlock() {
475    // Read and ignore the codelen value.  Since we are skipping this block, we
476    // don't care what code widths are used inside of it.
477    ReadVBR(bitc::CodeLenWidth);
478    SkipToFourByteBoundary();
479    unsigned NumFourBytes = Read(bitc::BlockSizeWidth);
480
481    // Check that the block wasn't partially defined, and that the offset isn't
482    // bogus.
483    size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
484    if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
485      return true;
486
487    JumpToBit(SkipTo);
488    return false;
489  }
490
491  /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
492  /// the block, and return true if the block has an error.
493  bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
494
495  bool ReadBlockEnd() {
496    if (BlockScope.empty()) return true;
497
498    // Block tail:
499    //    [END_BLOCK, <align4bytes>]
500    SkipToFourByteBoundary();
501
502    popBlockScope();
503    return false;
504  }
505
506private:
507
508  void popBlockScope() {
509    CurCodeSize = BlockScope.back().PrevCodeSize;
510
511    // Delete abbrevs from popped scope.
512    for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
513         i != e; ++i)
514      CurAbbrevs[i]->dropRef();
515
516    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
517    BlockScope.pop_back();
518  }
519
520  //===--------------------------------------------------------------------===//
521  // Record Processing
522  //===--------------------------------------------------------------------===//
523
524private:
525  void readAbbreviatedLiteral(const BitCodeAbbrevOp &Op,
526                              SmallVectorImpl<uint64_t> &Vals);
527  void readAbbreviatedField(const BitCodeAbbrevOp &Op,
528                            SmallVectorImpl<uint64_t> &Vals);
529  void skipAbbreviatedField(const BitCodeAbbrevOp &Op);
530
531public:
532
533  /// getAbbrev - Return the abbreviation for the specified AbbrevId.
534  const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
535    unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV;
536    assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
537    return CurAbbrevs[AbbrevNo];
538  }
539
540  /// skipRecord - Read the current record and discard it.
541  void skipRecord(unsigned AbbrevID);
542
543  unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
544                      StringRef *Blob = nullptr);
545
546  //===--------------------------------------------------------------------===//
547  // Abbrev Processing
548  //===--------------------------------------------------------------------===//
549  void ReadAbbrevRecord();
550
551  bool ReadBlockInfoBlock();
552};
553
554} // End llvm namespace
555
556#endif
557