BitstreamReader.h revision 28e4c4c9b3b9bf8939405df24b87062c1f10a9a3
1//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This header defines the BitstreamReader class. This class can be used to 11// read an arbitrary bitstream, regardless of its contents. 12// 13//===----------------------------------------------------------------------===// 14 15#ifndef BITSTREAM_READER_H 16#define BITSTREAM_READER_H 17 18#include "llvm/Bitcode/BitCodes.h" 19#include <vector> 20 21namespace llvm { 22 23class BitstreamReader { 24 const unsigned char *NextChar; 25 const unsigned char *LastChar; 26 27 /// CurWord - This is the current data we have pulled from the stream but have 28 /// not returned to the client. 29 uint32_t CurWord; 30 31 /// BitsInCurWord - This is the number of bits in CurWord that are valid. This 32 /// is always from [0...31] inclusive. 33 unsigned BitsInCurWord; 34 35 // CurCodeSize - This is the declared size of code values used for the current 36 // block, in bits. 37 unsigned CurCodeSize; 38 39 /// CurAbbrevs - Abbrevs installed at in this block. 40 std::vector<BitCodeAbbrev*> CurAbbrevs; 41 42 struct Block { 43 unsigned PrevCodeSize; 44 std::vector<BitCodeAbbrev*> PrevAbbrevs; 45 explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} 46 }; 47 48 /// BlockScope - This tracks the codesize of parent blocks. 49 SmallVector<Block, 8> BlockScope; 50 51 /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks. 52 /// These describe abbreviations that all blocks of the specified ID inherit. 53 struct BlockInfo { 54 unsigned BlockID; 55 std::vector<BitCodeAbbrev*> Abbrevs; 56 }; 57 std::vector<BlockInfo> BlockInfoRecords; 58 59 /// FirstChar - This remembers the first byte of the stream. 60 const unsigned char *FirstChar; 61public: 62 BitstreamReader() { 63 NextChar = FirstChar = LastChar = 0; 64 CurWord = 0; 65 BitsInCurWord = 0; 66 CurCodeSize = 0; 67 } 68 69 BitstreamReader(const unsigned char *Start, const unsigned char *End) { 70 init(Start, End); 71 } 72 73 void init(const unsigned char *Start, const unsigned char *End) { 74 NextChar = FirstChar = Start; 75 LastChar = End; 76 assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes"); 77 CurWord = 0; 78 BitsInCurWord = 0; 79 CurCodeSize = 2; 80 } 81 82 ~BitstreamReader() { 83 // Abbrevs could still exist if the stream was broken. If so, don't leak 84 // them. 85 for (unsigned i = 0, e = CurAbbrevs.size(); i != e; ++i) 86 CurAbbrevs[i]->dropRef(); 87 88 for (unsigned S = 0, e = BlockScope.size(); S != e; ++S) { 89 std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs; 90 for (unsigned i = 0, e = Abbrevs.size(); i != e; ++i) 91 Abbrevs[i]->dropRef(); 92 } 93 94 // Free the BlockInfoRecords. 95 while (!BlockInfoRecords.empty()) { 96 BlockInfo &Info = BlockInfoRecords.back(); 97 // Free blockinfo abbrev info. 98 for (unsigned i = 0, e = Info.Abbrevs.size(); i != e; ++i) 99 Info.Abbrevs[i]->dropRef(); 100 BlockInfoRecords.pop_back(); 101 } 102 } 103 104 bool AtEndOfStream() const { return NextChar == LastChar; } 105 106 /// GetCurrentBitNo - Return the bit # of the bit we are reading. 107 uint64_t GetCurrentBitNo() const { 108 return (NextChar-FirstChar)*8 + (32-BitsInCurWord); 109 } 110 111 /// JumpToBit - Reset the stream to the specified bit number. 112 void JumpToBit(uint64_t BitNo) { 113 unsigned ByteNo = (BitNo/8) & ~3; 114 unsigned WordBitNo = BitNo & 31; 115 assert(ByteNo < (unsigned)(LastChar-FirstChar) && "Invalid location"); 116 117 // Move the cursor to the right word. 118 NextChar = FirstChar+ByteNo; 119 BitsInCurWord = 0; 120 121 // Skip over any bits that are already consumed. 122 if (WordBitNo) { 123 NextChar -= 4; 124 Read(WordBitNo); 125 } 126 } 127 128 /// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #. 129 unsigned GetAbbrevIDWidth() const { return CurCodeSize; } 130 131 uint32_t Read(unsigned NumBits) { 132 // If the field is fully contained by CurWord, return it quickly. 133 if (BitsInCurWord >= NumBits) { 134 uint32_t R = CurWord & ((1U << NumBits)-1); 135 CurWord >>= NumBits; 136 BitsInCurWord -= NumBits; 137 return R; 138 } 139 140 // If we run out of data, stop at the end of the stream. 141 if (LastChar == NextChar) { 142 CurWord = 0; 143 BitsInCurWord = 0; 144 return 0; 145 } 146 147 unsigned R = CurWord; 148 149 // Read the next word from the stream. 150 CurWord = (NextChar[0] << 0) | (NextChar[1] << 8) | 151 (NextChar[2] << 16) | (NextChar[3] << 24); 152 NextChar += 4; 153 154 // Extract NumBits-BitsInCurWord from what we just read. 155 unsigned BitsLeft = NumBits-BitsInCurWord; 156 157 // Be careful here, BitsLeft is in the range [1..32] inclusive. 158 R |= (CurWord & (~0U >> (32-BitsLeft))) << BitsInCurWord; 159 160 // BitsLeft bits have just been used up from CurWord. 161 if (BitsLeft != 32) 162 CurWord >>= BitsLeft; 163 else 164 CurWord = 0; 165 BitsInCurWord = 32-BitsLeft; 166 return R; 167 } 168 169 uint64_t Read64(unsigned NumBits) { 170 if (NumBits <= 32) return Read(NumBits); 171 172 uint64_t V = Read(32); 173 return V | (uint64_t)Read(NumBits-32) << 32; 174 } 175 176 uint32_t ReadVBR(unsigned NumBits) { 177 uint32_t Piece = Read(NumBits); 178 if ((Piece & (1U << (NumBits-1))) == 0) 179 return Piece; 180 181 uint32_t Result = 0; 182 unsigned NextBit = 0; 183 while (1) { 184 Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; 185 186 if ((Piece & (1U << (NumBits-1))) == 0) 187 return Result; 188 189 NextBit += NumBits-1; 190 Piece = Read(NumBits); 191 } 192 } 193 194 uint64_t ReadVBR64(unsigned NumBits) { 195 uint64_t Piece = Read(NumBits); 196 if ((Piece & (1U << (NumBits-1))) == 0) 197 return Piece; 198 199 uint64_t Result = 0; 200 unsigned NextBit = 0; 201 while (1) { 202 Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; 203 204 if ((Piece & (1U << (NumBits-1))) == 0) 205 return Result; 206 207 NextBit += NumBits-1; 208 Piece = Read(NumBits); 209 } 210 } 211 212 void SkipToWord() { 213 BitsInCurWord = 0; 214 CurWord = 0; 215 } 216 217 218 unsigned ReadCode() { 219 return Read(CurCodeSize); 220 } 221 222 //===--------------------------------------------------------------------===// 223 // Block Manipulation 224 //===--------------------------------------------------------------------===// 225 226private: 227 /// getBlockInfo - If there is block info for the specified ID, return it, 228 /// otherwise return null. 229 BlockInfo *getBlockInfo(unsigned BlockID) { 230 // Common case, the most recent entry matches BlockID. 231 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) 232 return &BlockInfoRecords.back(); 233 234 for (unsigned i = 0, e = BlockInfoRecords.size(); i != e; ++i) 235 if (BlockInfoRecords[i].BlockID == BlockID) 236 return &BlockInfoRecords[i]; 237 return 0; 238 } 239public: 240 241 242 // Block header: 243 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 244 245 /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for 246 /// the block. 247 unsigned ReadSubBlockID() { 248 return ReadVBR(bitc::BlockIDWidth); 249 } 250 251 /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip 252 /// over the body of this block. If the block record is malformed, return 253 /// true. 254 bool SkipBlock() { 255 // Read and ignore the codelen value. Since we are skipping this block, we 256 // don't care what code widths are used inside of it. 257 ReadVBR(bitc::CodeLenWidth); 258 SkipToWord(); 259 unsigned NumWords = Read(bitc::BlockSizeWidth); 260 261 // Check that the block wasn't partially defined, and that the offset isn't 262 // bogus. 263 if (AtEndOfStream() || NextChar+NumWords*4 > LastChar) 264 return true; 265 266 NextChar += NumWords*4; 267 return false; 268 } 269 270 /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, read and enter 271 /// the block, returning the BlockID of the block we just entered. 272 bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0) { 273 // Save the current block's state on BlockScope. 274 BlockScope.push_back(Block(CurCodeSize)); 275 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 276 277 // Add the abbrevs specific to this block to the CurAbbrevs list. 278 if (BlockInfo *Info = getBlockInfo(BlockID)) { 279 for (unsigned i = 0, e = Info->Abbrevs.size(); i != e; ++i) { 280 CurAbbrevs.push_back(Info->Abbrevs[i]); 281 CurAbbrevs.back()->addRef(); 282 } 283 } 284 285 // Get the codesize of this block. 286 CurCodeSize = ReadVBR(bitc::CodeLenWidth); 287 SkipToWord(); 288 unsigned NumWords = Read(bitc::BlockSizeWidth); 289 if (NumWordsP) *NumWordsP = NumWords; 290 291 // Validate that this block is sane. 292 if (CurCodeSize == 0 || AtEndOfStream() || NextChar+NumWords*4 > LastChar) 293 return true; 294 295 return false; 296 } 297 298 bool ReadBlockEnd() { 299 if (BlockScope.empty()) return true; 300 301 // Block tail: 302 // [END_BLOCK, <align4bytes>] 303 SkipToWord(); 304 CurCodeSize = BlockScope.back().PrevCodeSize; 305 306 // Delete abbrevs from popped scope. 307 for (unsigned i = 0, e = CurAbbrevs.size(); i != e; ++i) 308 CurAbbrevs[i]->dropRef(); 309 310 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 311 BlockScope.pop_back(); 312 return false; 313 } 314 315 //===--------------------------------------------------------------------===// 316 // Record Processing 317 //===--------------------------------------------------------------------===// 318 319private: 320 void ReadAbbreviatedField(const BitCodeAbbrevOp &Op, 321 SmallVectorImpl<uint64_t> &Vals) { 322 if (Op.isLiteral()) { 323 // If the abbrev specifies the literal value to use, use it. 324 Vals.push_back(Op.getLiteralValue()); 325 } else { 326 // Decode the value as we are commanded. 327 switch (Op.getEncoding()) { 328 default: assert(0 && "Unknown encoding!"); 329 case BitCodeAbbrevOp::Fixed: 330 Vals.push_back(Read(Op.getEncodingData())); 331 break; 332 case BitCodeAbbrevOp::VBR: 333 Vals.push_back(ReadVBR64(Op.getEncodingData())); 334 break; 335 case BitCodeAbbrevOp::Char6: 336 Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6))); 337 break; 338 } 339 } 340 } 341public: 342 unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals) { 343 if (AbbrevID == bitc::UNABBREV_RECORD) { 344 unsigned Code = ReadVBR(6); 345 unsigned NumElts = ReadVBR(6); 346 for (unsigned i = 0; i != NumElts; ++i) 347 Vals.push_back(ReadVBR64(6)); 348 return Code; 349 } 350 351 unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV; 352 assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); 353 BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo]; 354 355 for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) { 356 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 357 if (Op.isLiteral() || Op.getEncoding() != BitCodeAbbrevOp::Array) { 358 ReadAbbreviatedField(Op, Vals); 359 } else { 360 // Array case. Read the number of elements as a vbr6. 361 unsigned NumElts = ReadVBR(6); 362 363 // Get the element encoding. 364 assert(i+2 == e && "array op not second to last?"); 365 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 366 367 // Read all the elements. 368 for (; NumElts; --NumElts) 369 ReadAbbreviatedField(EltEnc, Vals); 370 } 371 } 372 373 unsigned Code = Vals[0]; 374 Vals.erase(Vals.begin()); 375 return Code; 376 } 377 378 //===--------------------------------------------------------------------===// 379 // Abbrev Processing 380 //===--------------------------------------------------------------------===// 381 382 void ReadAbbrevRecord() { 383 BitCodeAbbrev *Abbv = new BitCodeAbbrev(); 384 unsigned NumOpInfo = ReadVBR(5); 385 for (unsigned i = 0; i != NumOpInfo; ++i) { 386 bool IsLiteral = Read(1); 387 if (IsLiteral) { 388 Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8))); 389 continue; 390 } 391 392 BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3); 393 if (BitCodeAbbrevOp::hasEncodingData(E)) 394 Abbv->Add(BitCodeAbbrevOp(E, ReadVBR64(5))); 395 else 396 Abbv->Add(BitCodeAbbrevOp(E)); 397 } 398 CurAbbrevs.push_back(Abbv); 399 } 400 401 //===--------------------------------------------------------------------===// 402 // BlockInfo Block Reading 403 //===--------------------------------------------------------------------===// 404 405private: 406 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { 407 if (BlockInfo *BI = getBlockInfo(BlockID)) 408 return *BI; 409 410 // Otherwise, add a new record. 411 BlockInfoRecords.push_back(BlockInfo()); 412 BlockInfoRecords.back().BlockID = BlockID; 413 return BlockInfoRecords.back(); 414 } 415 416public: 417 418 bool ReadBlockInfoBlock() { 419 if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true; 420 421 SmallVector<uint64_t, 64> Record; 422 BlockInfo *CurBlockInfo = 0; 423 424 // Read all the records for this module. 425 while (1) { 426 unsigned Code = ReadCode(); 427 if (Code == bitc::END_BLOCK) 428 return ReadBlockEnd(); 429 if (Code == bitc::ENTER_SUBBLOCK) { 430 ReadSubBlockID(); 431 if (SkipBlock()) return true; 432 continue; 433 } 434 435 // Read abbrev records, associate them with CurBID. 436 if (Code == bitc::DEFINE_ABBREV) { 437 if (!CurBlockInfo) return true; 438 ReadAbbrevRecord(); 439 440 // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the 441 // appropriate BlockInfo. 442 BitCodeAbbrev *Abbv = CurAbbrevs.back(); 443 CurAbbrevs.pop_back(); 444 CurBlockInfo->Abbrevs.push_back(Abbv); 445 continue; 446 } 447 448 // Read a record. 449 switch (ReadRecord(Code, Record)) { 450 default: break; // Default behavior, ignore unknown content. 451 case bitc::BLOCKINFO_CODE_SETBID: 452 if (Record.size() < 1) return true; 453 CurBlockInfo = &getOrCreateBlockInfo(Record[0]); 454 break; 455 } 456 } 457 } 458}; 459 460} // End llvm namespace 461 462#endif 463