BitstreamReader.h revision d3ff4a188e823baa141c288d31eb532ddf632b6e
1//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This header defines the BitstreamReader class. This class can be used to 11// read an arbitrary bitstream, regardless of its contents. 12// 13//===----------------------------------------------------------------------===// 14 15#ifndef BITSTREAM_READER_H 16#define BITSTREAM_READER_H 17 18#include "llvm/Bitcode/BitCodes.h" 19#include <vector> 20 21namespace llvm { 22 23 class Deserializer; 24 25class BitstreamReader { 26 const unsigned char *NextChar; 27 const unsigned char *LastChar; 28 friend class Deserializer; 29 30 /// CurWord - This is the current data we have pulled from the stream but have 31 /// not returned to the client. 32 uint32_t CurWord; 33 34 /// BitsInCurWord - This is the number of bits in CurWord that are valid. This 35 /// is always from [0...31] inclusive. 36 unsigned BitsInCurWord; 37 38 // CurCodeSize - This is the declared size of code values used for the current 39 // block, in bits. 40 unsigned CurCodeSize; 41 42 /// CurAbbrevs - Abbrevs installed at in this block. 43 std::vector<BitCodeAbbrev*> CurAbbrevs; 44 45 struct Block { 46 unsigned PrevCodeSize; 47 std::vector<BitCodeAbbrev*> PrevAbbrevs; 48 explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} 49 }; 50 51 /// BlockScope - This tracks the codesize of parent blocks. 52 SmallVector<Block, 8> BlockScope; 53 54 /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks. 55 /// These describe abbreviations that all blocks of the specified ID inherit. 56 struct BlockInfo { 57 unsigned BlockID; 58 std::vector<BitCodeAbbrev*> Abbrevs; 59 }; 60 std::vector<BlockInfo> BlockInfoRecords; 61 62 /// FirstChar - This remembers the first byte of the stream. 63 const unsigned char *FirstChar; 64public: 65 BitstreamReader() { 66 NextChar = FirstChar = LastChar = 0; 67 CurWord = 0; 68 BitsInCurWord = 0; 69 CurCodeSize = 0; 70 } 71 72 BitstreamReader(const unsigned char *Start, const unsigned char *End) { 73 init(Start, End); 74 } 75 76 void init(const unsigned char *Start, const unsigned char *End) { 77 NextChar = FirstChar = Start; 78 LastChar = End; 79 assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes"); 80 CurWord = 0; 81 BitsInCurWord = 0; 82 CurCodeSize = 2; 83 } 84 85 ~BitstreamReader() { 86 // Abbrevs could still exist if the stream was broken. If so, don't leak 87 // them. 88 for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size()); 89 i != e; ++i) 90 CurAbbrevs[i]->dropRef(); 91 92 for (unsigned S = 0, e = static_cast<unsigned>(BlockScope.size()); 93 S != e; ++S) { 94 std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs; 95 for (unsigned i = 0, e = static_cast<unsigned>(Abbrevs.size()); 96 i != e; ++i) 97 Abbrevs[i]->dropRef(); 98 } 99 100 // Free the BlockInfoRecords. 101 while (!BlockInfoRecords.empty()) { 102 BlockInfo &Info = BlockInfoRecords.back(); 103 // Free blockinfo abbrev info. 104 for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size()); 105 i != e; ++i) 106 Info.Abbrevs[i]->dropRef(); 107 BlockInfoRecords.pop_back(); 108 } 109 } 110 111 bool AtEndOfStream() const { 112 return NextChar == LastChar && BitsInCurWord == 0; 113 } 114 115 /// GetCurrentBitNo - Return the bit # of the bit we are reading. 116 uint64_t GetCurrentBitNo() const { 117 return (NextChar-FirstChar)*8 + ((32-BitsInCurWord) & 31); 118 } 119 120 /// JumpToBit - Reset the stream to the specified bit number. 121 void JumpToBit(uint64_t BitNo) { 122 uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3; 123 uintptr_t WordBitNo = uintptr_t(BitNo) & 31; 124 assert(ByteNo < (uintptr_t)(LastChar-FirstChar) && "Invalid location"); 125 126 // Move the cursor to the right word. 127 NextChar = FirstChar+ByteNo; 128 BitsInCurWord = 0; 129 CurWord = 0; 130 131 // Skip over any bits that are already consumed. 132 if (WordBitNo) { 133 NextChar -= 4; 134 Read(static_cast<unsigned>(WordBitNo)); 135 } 136 } 137 138 /// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #. 139 unsigned GetAbbrevIDWidth() const { return CurCodeSize; } 140 141 uint32_t Read(unsigned NumBits) { 142 // If the field is fully contained by CurWord, return it quickly. 143 if (BitsInCurWord >= NumBits) { 144 uint32_t R = CurWord & ((1U << NumBits)-1); 145 CurWord >>= NumBits; 146 BitsInCurWord -= NumBits; 147 return R; 148 } 149 150 // If we run out of data, stop at the end of the stream. 151 if (LastChar == NextChar) { 152 CurWord = 0; 153 BitsInCurWord = 0; 154 return 0; 155 } 156 157 unsigned R = CurWord; 158 159 // Read the next word from the stream. 160 CurWord = (NextChar[0] << 0) | (NextChar[1] << 8) | 161 (NextChar[2] << 16) | (NextChar[3] << 24); 162 NextChar += 4; 163 164 // Extract NumBits-BitsInCurWord from what we just read. 165 unsigned BitsLeft = NumBits-BitsInCurWord; 166 167 // Be careful here, BitsLeft is in the range [1..32] inclusive. 168 R |= (CurWord & (~0U >> (32-BitsLeft))) << BitsInCurWord; 169 170 // BitsLeft bits have just been used up from CurWord. 171 if (BitsLeft != 32) 172 CurWord >>= BitsLeft; 173 else 174 CurWord = 0; 175 BitsInCurWord = 32-BitsLeft; 176 return R; 177 } 178 179 uint64_t Read64(unsigned NumBits) { 180 if (NumBits <= 32) return Read(NumBits); 181 182 uint64_t V = Read(32); 183 return V | (uint64_t)Read(NumBits-32) << 32; 184 } 185 186 uint32_t ReadVBR(unsigned NumBits) { 187 uint32_t Piece = Read(NumBits); 188 if ((Piece & (1U << (NumBits-1))) == 0) 189 return Piece; 190 191 uint32_t Result = 0; 192 unsigned NextBit = 0; 193 while (1) { 194 Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; 195 196 if ((Piece & (1U << (NumBits-1))) == 0) 197 return Result; 198 199 NextBit += NumBits-1; 200 Piece = Read(NumBits); 201 } 202 } 203 204 uint64_t ReadVBR64(unsigned NumBits) { 205 uint64_t Piece = Read(NumBits); 206 if ((Piece & (1U << (NumBits-1))) == 0) 207 return Piece; 208 209 uint64_t Result = 0; 210 unsigned NextBit = 0; 211 while (1) { 212 Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; 213 214 if ((Piece & (1U << (NumBits-1))) == 0) 215 return Result; 216 217 NextBit += NumBits-1; 218 Piece = Read(NumBits); 219 } 220 } 221 222 void SkipToWord() { 223 BitsInCurWord = 0; 224 CurWord = 0; 225 } 226 227 228 unsigned ReadCode() { 229 return Read(CurCodeSize); 230 } 231 232 //===--------------------------------------------------------------------===// 233 // Block Manipulation 234 //===--------------------------------------------------------------------===// 235 236private: 237 /// getBlockInfo - If there is block info for the specified ID, return it, 238 /// otherwise return null. 239 BlockInfo *getBlockInfo(unsigned BlockID) { 240 // Common case, the most recent entry matches BlockID. 241 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) 242 return &BlockInfoRecords.back(); 243 244 for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size()); 245 i != e; ++i) 246 if (BlockInfoRecords[i].BlockID == BlockID) 247 return &BlockInfoRecords[i]; 248 return 0; 249 } 250public: 251 252 253 // Block header: 254 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 255 256 /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for 257 /// the block. 258 unsigned ReadSubBlockID() { 259 return ReadVBR(bitc::BlockIDWidth); 260 } 261 262 /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip 263 /// over the body of this block. If the block record is malformed, return 264 /// true. 265 bool SkipBlock() { 266 // Read and ignore the codelen value. Since we are skipping this block, we 267 // don't care what code widths are used inside of it. 268 ReadVBR(bitc::CodeLenWidth); 269 SkipToWord(); 270 unsigned NumWords = Read(bitc::BlockSizeWidth); 271 272 // Check that the block wasn't partially defined, and that the offset isn't 273 // bogus. 274 if (AtEndOfStream() || NextChar+NumWords*4 > LastChar) 275 return true; 276 277 NextChar += NumWords*4; 278 return false; 279 } 280 281 /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter 282 /// the block, and return true if the block is valid. 283 bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0) { 284 // Save the current block's state on BlockScope. 285 BlockScope.push_back(Block(CurCodeSize)); 286 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 287 288 // Add the abbrevs specific to this block to the CurAbbrevs list. 289 if (BlockInfo *Info = getBlockInfo(BlockID)) { 290 for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size()); 291 i != e; ++i) { 292 CurAbbrevs.push_back(Info->Abbrevs[i]); 293 CurAbbrevs.back()->addRef(); 294 } 295 } 296 297 // Get the codesize of this block. 298 CurCodeSize = ReadVBR(bitc::CodeLenWidth); 299 SkipToWord(); 300 unsigned NumWords = Read(bitc::BlockSizeWidth); 301 if (NumWordsP) *NumWordsP = NumWords; 302 303 // Validate that this block is sane. 304 if (CurCodeSize == 0 || AtEndOfStream() || NextChar+NumWords*4 > LastChar) 305 return true; 306 307 return false; 308 } 309 310 bool ReadBlockEnd() { 311 if (BlockScope.empty()) return true; 312 313 // Block tail: 314 // [END_BLOCK, <align4bytes>] 315 SkipToWord(); 316 317 PopBlockScope(); 318 return false; 319 } 320 321private: 322 void PopBlockScope() { 323 CurCodeSize = BlockScope.back().PrevCodeSize; 324 325 // Delete abbrevs from popped scope. 326 for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size()); 327 i != e; ++i) 328 CurAbbrevs[i]->dropRef(); 329 330 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 331 BlockScope.pop_back(); 332 } 333 334 //===--------------------------------------------------------------------===// 335 // Record Processing 336 //===--------------------------------------------------------------------===// 337 338private: 339 void ReadAbbreviatedField(const BitCodeAbbrevOp &Op, 340 SmallVectorImpl<uint64_t> &Vals) { 341 if (Op.isLiteral()) { 342 // If the abbrev specifies the literal value to use, use it. 343 Vals.push_back(Op.getLiteralValue()); 344 } else { 345 // Decode the value as we are commanded. 346 switch (Op.getEncoding()) { 347 default: assert(0 && "Unknown encoding!"); 348 case BitCodeAbbrevOp::Fixed: 349 Vals.push_back(Read((unsigned)Op.getEncodingData())); 350 break; 351 case BitCodeAbbrevOp::VBR: 352 Vals.push_back(ReadVBR64((unsigned)Op.getEncodingData())); 353 break; 354 case BitCodeAbbrevOp::Char6: 355 Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6))); 356 break; 357 } 358 } 359 } 360public: 361 unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals) { 362 if (AbbrevID == bitc::UNABBREV_RECORD) { 363 unsigned Code = ReadVBR(6); 364 unsigned NumElts = ReadVBR(6); 365 for (unsigned i = 0; i != NumElts; ++i) 366 Vals.push_back(ReadVBR64(6)); 367 return Code; 368 } 369 370 unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV; 371 assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); 372 BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo]; 373 374 for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) { 375 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 376 if (Op.isLiteral() || Op.getEncoding() != BitCodeAbbrevOp::Array) { 377 ReadAbbreviatedField(Op, Vals); 378 } else { 379 // Array case. Read the number of elements as a vbr6. 380 unsigned NumElts = ReadVBR(6); 381 382 // Get the element encoding. 383 assert(i+2 == e && "array op not second to last?"); 384 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 385 386 // Read all the elements. 387 for (; NumElts; --NumElts) 388 ReadAbbreviatedField(EltEnc, Vals); 389 } 390 } 391 392 unsigned Code = (unsigned)Vals[0]; 393 Vals.erase(Vals.begin()); 394 return Code; 395 } 396 397 //===--------------------------------------------------------------------===// 398 // Abbrev Processing 399 //===--------------------------------------------------------------------===// 400 401 void ReadAbbrevRecord() { 402 BitCodeAbbrev *Abbv = new BitCodeAbbrev(); 403 unsigned NumOpInfo = ReadVBR(5); 404 for (unsigned i = 0; i != NumOpInfo; ++i) { 405 bool IsLiteral = Read(1) ? true : false; 406 if (IsLiteral) { 407 Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8))); 408 continue; 409 } 410 411 BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3); 412 if (BitCodeAbbrevOp::hasEncodingData(E)) 413 Abbv->Add(BitCodeAbbrevOp(E, ReadVBR64(5))); 414 else 415 Abbv->Add(BitCodeAbbrevOp(E)); 416 } 417 CurAbbrevs.push_back(Abbv); 418 } 419 420 //===--------------------------------------------------------------------===// 421 // BlockInfo Block Reading 422 //===--------------------------------------------------------------------===// 423 424private: 425 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { 426 if (BlockInfo *BI = getBlockInfo(BlockID)) 427 return *BI; 428 429 // Otherwise, add a new record. 430 BlockInfoRecords.push_back(BlockInfo()); 431 BlockInfoRecords.back().BlockID = BlockID; 432 return BlockInfoRecords.back(); 433 } 434 435public: 436 437 bool ReadBlockInfoBlock() { 438 if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true; 439 440 SmallVector<uint64_t, 64> Record; 441 BlockInfo *CurBlockInfo = 0; 442 443 // Read all the records for this module. 444 while (1) { 445 unsigned Code = ReadCode(); 446 if (Code == bitc::END_BLOCK) 447 return ReadBlockEnd(); 448 if (Code == bitc::ENTER_SUBBLOCK) { 449 ReadSubBlockID(); 450 if (SkipBlock()) return true; 451 continue; 452 } 453 454 // Read abbrev records, associate them with CurBID. 455 if (Code == bitc::DEFINE_ABBREV) { 456 if (!CurBlockInfo) return true; 457 ReadAbbrevRecord(); 458 459 // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the 460 // appropriate BlockInfo. 461 BitCodeAbbrev *Abbv = CurAbbrevs.back(); 462 CurAbbrevs.pop_back(); 463 CurBlockInfo->Abbrevs.push_back(Abbv); 464 continue; 465 } 466 467 // Read a record. 468 Record.clear(); 469 switch (ReadRecord(Code, Record)) { 470 default: break; // Default behavior, ignore unknown content. 471 case bitc::BLOCKINFO_CODE_SETBID: 472 if (Record.size() < 1) return true; 473 CurBlockInfo = &getOrCreateBlockInfo((unsigned)Record[0]); 474 break; 475 } 476 } 477 } 478}; 479 480} // End llvm namespace 481 482#endif 483