BitstreamReader.h revision f0891be8bdbeeadb39da5575273b6645755fa383
1//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This header defines the BitstreamReader class.  This class can be used to
11// read an arbitrary bitstream, regardless of its contents.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef BITSTREAM_READER_H
16#define BITSTREAM_READER_H
17
18#include "llvm/Bitcode/BitCodes.h"
19#include <climits>
20#include <string>
21#include <vector>
22
23namespace llvm {
24
25  class Deserializer;
26
27class BitstreamReader {
28public:
29  /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
30  /// These describe abbreviations that all blocks of the specified ID inherit.
31  struct BlockInfo {
32    unsigned BlockID;
33    std::vector<BitCodeAbbrev*> Abbrevs;
34    std::string Name;
35
36    std::vector<std::pair<unsigned, std::string> > RecordNames;
37  };
38private:
39  /// FirstChar/LastChar - This remembers the first and last bytes of the
40  /// stream.
41  const unsigned char *FirstChar, *LastChar;
42
43  std::vector<BlockInfo> BlockInfoRecords;
44
45  /// IgnoreBlockInfoNames - This is set to true if we don't care about the
46  /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer
47  /// uses this.
48  bool IgnoreBlockInfoNames;
49
50  BitstreamReader(const BitstreamReader&);  // NOT IMPLEMENTED
51  void operator=(const BitstreamReader&);  // NOT IMPLEMENTED
52public:
53  BitstreamReader() : FirstChar(0), LastChar(0), IgnoreBlockInfoNames(true) {
54  }
55
56  BitstreamReader(const unsigned char *Start, const unsigned char *End) {
57    IgnoreBlockInfoNames = true;
58    init(Start, End);
59  }
60
61  void init(const unsigned char *Start, const unsigned char *End) {
62    FirstChar = Start;
63    LastChar = End;
64    assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
65  }
66
67  ~BitstreamReader() {
68    // Free the BlockInfoRecords.
69    while (!BlockInfoRecords.empty()) {
70      BlockInfo &Info = BlockInfoRecords.back();
71      // Free blockinfo abbrev info.
72      for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
73           i != e; ++i)
74        Info.Abbrevs[i]->dropRef();
75      BlockInfoRecords.pop_back();
76    }
77  }
78
79  const unsigned char *getFirstChar() const { return FirstChar; }
80  const unsigned char *getLastChar() const { return LastChar; }
81
82  /// CollectBlockInfoNames - This is called by clients that want block/record
83  /// name information.
84  void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
85  bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
86
87  //===--------------------------------------------------------------------===//
88  // Block Manipulation
89  //===--------------------------------------------------------------------===//
90
91  /// hasBlockInfoRecords - Return true if we've already read and processed the
92  /// block info block for this Bitstream.  We only process it for the first
93  /// cursor that walks over it.
94  bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
95
96  /// getBlockInfo - If there is block info for the specified ID, return it,
97  /// otherwise return null.
98  const BlockInfo *getBlockInfo(unsigned BlockID) const {
99    // Common case, the most recent entry matches BlockID.
100    if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
101      return &BlockInfoRecords.back();
102
103    for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
104         i != e; ++i)
105      if (BlockInfoRecords[i].BlockID == BlockID)
106        return &BlockInfoRecords[i];
107    return 0;
108  }
109
110  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
111    if (const BlockInfo *BI = getBlockInfo(BlockID))
112      return *const_cast<BlockInfo*>(BI);
113
114    // Otherwise, add a new record.
115    BlockInfoRecords.push_back(BlockInfo());
116    BlockInfoRecords.back().BlockID = BlockID;
117    return BlockInfoRecords.back();
118  }
119
120};
121
122class BitstreamCursor {
123  friend class Deserializer;
124  BitstreamReader *BitStream;
125  const unsigned char *NextChar;
126
127  /// CurWord - This is the current data we have pulled from the stream but have
128  /// not returned to the client.
129  uint32_t CurWord;
130
131  /// BitsInCurWord - This is the number of bits in CurWord that are valid. This
132  /// is always from [0...31] inclusive.
133  unsigned BitsInCurWord;
134
135  // CurCodeSize - This is the declared size of code values used for the current
136  // block, in bits.
137  unsigned CurCodeSize;
138
139  /// CurAbbrevs - Abbrevs installed at in this block.
140  std::vector<BitCodeAbbrev*> CurAbbrevs;
141
142  struct Block {
143    unsigned PrevCodeSize;
144    std::vector<BitCodeAbbrev*> PrevAbbrevs;
145    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
146  };
147
148  /// BlockScope - This tracks the codesize of parent blocks.
149  SmallVector<Block, 8> BlockScope;
150
151public:
152  BitstreamCursor() : BitStream(0), NextChar(0) {
153  }
154  BitstreamCursor(const BitstreamCursor &RHS) : BitStream(0), NextChar(0) {
155    operator=(RHS);
156  }
157
158  explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
159    NextChar = R.getFirstChar();
160    assert(NextChar && "Bitstream not initialized yet");
161    CurWord = 0;
162    BitsInCurWord = 0;
163    CurCodeSize = 2;
164  }
165
166  void init(BitstreamReader &R) {
167    freeState();
168
169    BitStream = &R;
170    NextChar = R.getFirstChar();
171    assert(NextChar && "Bitstream not initialized yet");
172    CurWord = 0;
173    BitsInCurWord = 0;
174    CurCodeSize = 2;
175  }
176
177  ~BitstreamCursor() {
178    freeState();
179  }
180
181  void operator=(const BitstreamCursor &RHS) {
182    freeState();
183
184    BitStream = RHS.BitStream;
185    NextChar = RHS.NextChar;
186    CurWord = RHS.CurWord;
187    BitsInCurWord = RHS.BitsInCurWord;
188    CurCodeSize = RHS.CurCodeSize;
189
190    // Copy abbreviations, and bump ref counts.
191    CurAbbrevs = RHS.CurAbbrevs;
192    for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
193         i != e; ++i)
194      CurAbbrevs[i]->addRef();
195
196    // Copy block scope and bump ref counts.
197    for (unsigned S = 0, e = static_cast<unsigned>(BlockScope.size());
198         S != e; ++S) {
199      std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
200      for (unsigned i = 0, e = static_cast<unsigned>(Abbrevs.size());
201           i != e; ++i)
202        Abbrevs[i]->addRef();
203    }
204  }
205
206  void freeState() {
207    // Free all the Abbrevs.
208    for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
209         i != e; ++i)
210      CurAbbrevs[i]->dropRef();
211    CurAbbrevs.clear();
212
213    // Free all the Abbrevs in the block scope.
214    for (unsigned S = 0, e = static_cast<unsigned>(BlockScope.size());
215         S != e; ++S) {
216      std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
217      for (unsigned i = 0, e = static_cast<unsigned>(Abbrevs.size());
218           i != e; ++i)
219        Abbrevs[i]->dropRef();
220    }
221    BlockScope.clear();
222  }
223
224  /// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
225  unsigned GetAbbrevIDWidth() const { return CurCodeSize; }
226
227  bool AtEndOfStream() const {
228    return NextChar == BitStream->getLastChar() && BitsInCurWord == 0;
229  }
230
231  /// GetCurrentBitNo - Return the bit # of the bit we are reading.
232  uint64_t GetCurrentBitNo() const {
233    return (NextChar-BitStream->getFirstChar())*CHAR_BIT - BitsInCurWord;
234  }
235
236  BitstreamReader *getBitStreamReader() {
237    return BitStream;
238  }
239  const BitstreamReader *getBitStreamReader() const {
240    return BitStream;
241  }
242
243
244  /// JumpToBit - Reset the stream to the specified bit number.
245  void JumpToBit(uint64_t BitNo) {
246    uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3;
247    uintptr_t WordBitNo = uintptr_t(BitNo) & 31;
248    assert(ByteNo <= (uintptr_t)(BitStream->getLastChar()-
249                                 BitStream->getFirstChar()) &&
250           "Invalid location");
251
252    // Move the cursor to the right word.
253    NextChar = BitStream->getFirstChar()+ByteNo;
254    BitsInCurWord = 0;
255    CurWord = 0;
256
257    // Skip over any bits that are already consumed.
258    if (WordBitNo)
259      Read(static_cast<unsigned>(WordBitNo));
260  }
261
262
263  uint32_t Read(unsigned NumBits) {
264    assert(NumBits <= 32 && "Cannot return more than 32 bits!");
265    // If the field is fully contained by CurWord, return it quickly.
266    if (BitsInCurWord >= NumBits) {
267      uint32_t R = CurWord & ((1U << NumBits)-1);
268      CurWord >>= NumBits;
269      BitsInCurWord -= NumBits;
270      return R;
271    }
272
273    // If we run out of data, stop at the end of the stream.
274    if (NextChar == BitStream->getLastChar()) {
275      CurWord = 0;
276      BitsInCurWord = 0;
277      return 0;
278    }
279
280    unsigned R = CurWord;
281
282    // Read the next word from the stream.
283    CurWord = (NextChar[0] <<  0) | (NextChar[1] << 8) |
284              (NextChar[2] << 16) | (NextChar[3] << 24);
285    NextChar += 4;
286
287    // Extract NumBits-BitsInCurWord from what we just read.
288    unsigned BitsLeft = NumBits-BitsInCurWord;
289
290    // Be careful here, BitsLeft is in the range [1..32] inclusive.
291    R |= (CurWord & (~0U >> (32-BitsLeft))) << BitsInCurWord;
292
293    // BitsLeft bits have just been used up from CurWord.
294    if (BitsLeft != 32)
295      CurWord >>= BitsLeft;
296    else
297      CurWord = 0;
298    BitsInCurWord = 32-BitsLeft;
299    return R;
300  }
301
302  uint64_t Read64(unsigned NumBits) {
303    if (NumBits <= 32) return Read(NumBits);
304
305    uint64_t V = Read(32);
306    return V | (uint64_t)Read(NumBits-32) << 32;
307  }
308
309  uint32_t ReadVBR(unsigned NumBits) {
310    uint32_t Piece = Read(NumBits);
311    if ((Piece & (1U << (NumBits-1))) == 0)
312      return Piece;
313
314    uint32_t Result = 0;
315    unsigned NextBit = 0;
316    while (1) {
317      Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
318
319      if ((Piece & (1U << (NumBits-1))) == 0)
320        return Result;
321
322      NextBit += NumBits-1;
323      Piece = Read(NumBits);
324    }
325  }
326
327  // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size.  The
328  // chunk size of the VBR must still be <= 32 bits though.
329  uint64_t ReadVBR64(unsigned NumBits) {
330    uint32_t Piece = Read(NumBits);
331    if ((Piece & (1U << (NumBits-1))) == 0)
332      return uint64_t(Piece);
333
334    uint64_t Result = 0;
335    unsigned NextBit = 0;
336    while (1) {
337      Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
338
339      if ((Piece & (1U << (NumBits-1))) == 0)
340        return Result;
341
342      NextBit += NumBits-1;
343      Piece = Read(NumBits);
344    }
345  }
346
347  void SkipToWord() {
348    BitsInCurWord = 0;
349    CurWord = 0;
350  }
351
352  unsigned ReadCode() {
353    return Read(CurCodeSize);
354  }
355
356
357  // Block header:
358  //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
359
360  /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for
361  /// the block.
362  unsigned ReadSubBlockID() {
363    return ReadVBR(bitc::BlockIDWidth);
364  }
365
366  /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip
367  /// over the body of this block.  If the block record is malformed, return
368  /// true.
369  bool SkipBlock() {
370    // Read and ignore the codelen value.  Since we are skipping this block, we
371    // don't care what code widths are used inside of it.
372    ReadVBR(bitc::CodeLenWidth);
373    SkipToWord();
374    unsigned NumWords = Read(bitc::BlockSizeWidth);
375
376    // Check that the block wasn't partially defined, and that the offset isn't
377    // bogus.
378    if (AtEndOfStream() || NextChar+NumWords*4 > BitStream->getLastChar())
379      return true;
380
381    NextChar += NumWords*4;
382    return false;
383  }
384
385  /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
386  /// the block, and return true if the block is valid.
387  bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0) {
388    // Save the current block's state on BlockScope.
389    BlockScope.push_back(Block(CurCodeSize));
390    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
391
392    // Add the abbrevs specific to this block to the CurAbbrevs list.
393    if (const BitstreamReader::BlockInfo *Info =
394          BitStream->getBlockInfo(BlockID)) {
395      for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size());
396           i != e; ++i) {
397        CurAbbrevs.push_back(Info->Abbrevs[i]);
398        CurAbbrevs.back()->addRef();
399      }
400    }
401
402    // Get the codesize of this block.
403    CurCodeSize = ReadVBR(bitc::CodeLenWidth);
404    SkipToWord();
405    unsigned NumWords = Read(bitc::BlockSizeWidth);
406    if (NumWordsP) *NumWordsP = NumWords;
407
408    // Validate that this block is sane.
409    if (CurCodeSize == 0 || AtEndOfStream() ||
410        NextChar+NumWords*4 > BitStream->getLastChar())
411      return true;
412
413    return false;
414  }
415
416  bool ReadBlockEnd() {
417    if (BlockScope.empty()) return true;
418
419    // Block tail:
420    //    [END_BLOCK, <align4bytes>]
421    SkipToWord();
422
423    PopBlockScope();
424    return false;
425  }
426
427private:
428  void PopBlockScope() {
429    CurCodeSize = BlockScope.back().PrevCodeSize;
430
431    // Delete abbrevs from popped scope.
432    for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
433         i != e; ++i)
434      CurAbbrevs[i]->dropRef();
435
436    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
437    BlockScope.pop_back();
438  }
439
440 //===--------------------------------------------------------------------===//
441  // Record Processing
442  //===--------------------------------------------------------------------===//
443
444private:
445  void ReadAbbreviatedLiteral(const BitCodeAbbrevOp &Op,
446                              SmallVectorImpl<uint64_t> &Vals) {
447    assert(Op.isLiteral() && "Not a literal");
448    // If the abbrev specifies the literal value to use, use it.
449    Vals.push_back(Op.getLiteralValue());
450  }
451
452  void ReadAbbreviatedField(const BitCodeAbbrevOp &Op,
453                            SmallVectorImpl<uint64_t> &Vals) {
454    assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
455
456    // Decode the value as we are commanded.
457    switch (Op.getEncoding()) {
458    default: assert(0 && "Unknown encoding!");
459    case BitCodeAbbrevOp::Fixed:
460      Vals.push_back(Read((unsigned)Op.getEncodingData()));
461      break;
462    case BitCodeAbbrevOp::VBR:
463      Vals.push_back(ReadVBR64((unsigned)Op.getEncodingData()));
464      break;
465    case BitCodeAbbrevOp::Char6:
466      Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6)));
467      break;
468    }
469  }
470public:
471
472  /// getAbbrev - Return the abbreviation for the specified AbbrevId.
473  const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
474    unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV;
475    assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
476    return CurAbbrevs[AbbrevNo];
477  }
478
479  unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
480                      const char **BlobStart = 0, unsigned *BlobLen = 0) {
481    if (AbbrevID == bitc::UNABBREV_RECORD) {
482      unsigned Code = ReadVBR(6);
483      unsigned NumElts = ReadVBR(6);
484      for (unsigned i = 0; i != NumElts; ++i)
485        Vals.push_back(ReadVBR64(6));
486      return Code;
487    }
488
489    const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
490
491    for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
492      const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
493      if (Op.isLiteral()) {
494        ReadAbbreviatedLiteral(Op, Vals);
495      } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
496        // Array case.  Read the number of elements as a vbr6.
497        unsigned NumElts = ReadVBR(6);
498
499        // Get the element encoding.
500        assert(i+2 == e && "array op not second to last?");
501        const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
502
503        // Read all the elements.
504        for (; NumElts; --NumElts)
505          ReadAbbreviatedField(EltEnc, Vals);
506      } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) {
507        // Blob case.  Read the number of bytes as a vbr6.
508        unsigned NumElts = ReadVBR(6);
509        SkipToWord();  // 32-bit alignment
510
511        // Figure out where the end of this blob will be including tail padding.
512        const unsigned char *NewEnd = NextChar+((NumElts+3)&~3);
513
514        // If this would read off the end of the bitcode file, just set the
515        // record to empty and return.
516        if (NewEnd > BitStream->getLastChar()) {
517          Vals.append(NumElts, 0);
518          NextChar = BitStream->getLastChar();
519          break;
520        }
521
522        // Otherwise, read the number of bytes.  If we can return a reference to
523        // the data, do so to avoid copying it.
524        if (BlobStart) {
525          *BlobStart = (const char*)NextChar;
526          *BlobLen = NumElts;
527        } else {
528          for (; NumElts; ++NextChar, --NumElts)
529            Vals.push_back(*NextChar);
530        }
531        // Skip over tail padding.
532        NextChar = NewEnd;
533      } else {
534        ReadAbbreviatedField(Op, Vals);
535      }
536    }
537
538    unsigned Code = (unsigned)Vals[0];
539    Vals.erase(Vals.begin());
540    return Code;
541  }
542
543  unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
544                      const char *&BlobStart, unsigned &BlobLen) {
545    return ReadRecord(AbbrevID, Vals, &BlobStart, &BlobLen);
546  }
547
548
549  //===--------------------------------------------------------------------===//
550  // Abbrev Processing
551  //===--------------------------------------------------------------------===//
552
553  void ReadAbbrevRecord() {
554    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
555    unsigned NumOpInfo = ReadVBR(5);
556    for (unsigned i = 0; i != NumOpInfo; ++i) {
557      bool IsLiteral = Read(1) ? true : false;
558      if (IsLiteral) {
559        Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8)));
560        continue;
561      }
562
563      BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3);
564      if (BitCodeAbbrevOp::hasEncodingData(E))
565        Abbv->Add(BitCodeAbbrevOp(E, ReadVBR64(5)));
566      else
567        Abbv->Add(BitCodeAbbrevOp(E));
568    }
569    CurAbbrevs.push_back(Abbv);
570  }
571
572public:
573
574  bool ReadBlockInfoBlock() {
575    // If this is the second stream to get to the block info block, skip it.
576    if (BitStream->hasBlockInfoRecords())
577      return SkipBlock();
578
579    if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true;
580
581    SmallVector<uint64_t, 64> Record;
582    BitstreamReader::BlockInfo *CurBlockInfo = 0;
583
584    // Read all the records for this module.
585    while (1) {
586      unsigned Code = ReadCode();
587      if (Code == bitc::END_BLOCK)
588        return ReadBlockEnd();
589      if (Code == bitc::ENTER_SUBBLOCK) {
590        ReadSubBlockID();
591        if (SkipBlock()) return true;
592        continue;
593      }
594
595      // Read abbrev records, associate them with CurBID.
596      if (Code == bitc::DEFINE_ABBREV) {
597        if (!CurBlockInfo) return true;
598        ReadAbbrevRecord();
599
600        // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
601        // appropriate BlockInfo.
602        BitCodeAbbrev *Abbv = CurAbbrevs.back();
603        CurAbbrevs.pop_back();
604        CurBlockInfo->Abbrevs.push_back(Abbv);
605        continue;
606      }
607
608      // Read a record.
609      Record.clear();
610      switch (ReadRecord(Code, Record)) {
611      default: break;  // Default behavior, ignore unknown content.
612      case bitc::BLOCKINFO_CODE_SETBID:
613        if (Record.size() < 1) return true;
614        CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]);
615        break;
616      case bitc::BLOCKINFO_CODE_BLOCKNAME: {
617        if (!CurBlockInfo) return true;
618        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
619        std::string Name;
620        for (unsigned i = 0, e = Record.size(); i != e; ++i)
621          Name += (char)Record[i];
622        CurBlockInfo->Name = Name;
623        break;
624      }
625      case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
626        if (!CurBlockInfo) return true;
627        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
628        std::string Name;
629        for (unsigned i = 1, e = Record.size(); i != e; ++i)
630          Name += (char)Record[i];
631        CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0],
632                                                           Name));
633        break;
634      }
635      }
636    }
637  }
638};
639
640} // End llvm namespace
641
642#endif
643