BitstreamWriter.h revision 0648f54b889e0f627172acaa1d622f8d3d9f5409
1//===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This header defines the BitstreamWriter class. This class can be used to 11// write an arbitrary bitstream, regardless of its contents. 12// 13//===----------------------------------------------------------------------===// 14 15#ifndef BITSTREAM_WRITER_H 16#define BITSTREAM_WRITER_H 17 18#include "llvm/Bitcode/BitCodes.h" 19#include <vector> 20 21namespace llvm { 22 23class BitstreamWriter { 24 std::vector<unsigned char> &Out; 25 26 /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use. 27 unsigned CurBit; 28 29 /// CurValue - The current value. Only bits < CurBit are valid. 30 uint32_t CurValue; 31 32 /// CurCodeSize - This is the declared size of code values used for the 33 /// current block, in bits. 34 unsigned CurCodeSize; 35 36 /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently 37 /// selected BLOCK ID. 38 unsigned BlockInfoCurBID; 39 40 /// CurAbbrevs - Abbrevs installed at in this block. 41 std::vector<BitCodeAbbrev*> CurAbbrevs; 42 43 struct Block { 44 unsigned PrevCodeSize; 45 unsigned StartSizeWord; 46 std::vector<BitCodeAbbrev*> PrevAbbrevs; 47 Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {} 48 }; 49 50 /// BlockScope - This tracks the current blocks that we have entered. 51 std::vector<Block> BlockScope; 52 53 /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks. 54 /// These describe abbreviations that all blocks of the specified ID inherit. 55 struct BlockInfo { 56 unsigned BlockID; 57 std::vector<BitCodeAbbrev*> Abbrevs; 58 }; 59 std::vector<BlockInfo> BlockInfoRecords; 60 61public: 62 explicit BitstreamWriter(std::vector<unsigned char> &O) 63 : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {} 64 65 ~BitstreamWriter() { 66 assert(CurBit == 0 && "Unflused data remaining"); 67 assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance"); 68 69 // Free the BlockInfoRecords. 70 while (!BlockInfoRecords.empty()) { 71 BlockInfo &Info = BlockInfoRecords.back(); 72 // Free blockinfo abbrev info. 73 for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size()); 74 i != e; ++i) 75 Info.Abbrevs[i]->dropRef(); 76 BlockInfoRecords.pop_back(); 77 } 78 } 79 80 std::vector<unsigned char> &getBuffer() { return Out; } 81 82 /// \brief Retrieve the current position in the stream, in bits. 83 uint64_t GetCurrentBitNo() const { return Out.size() * 8 + CurBit; } 84 85 //===--------------------------------------------------------------------===// 86 // Basic Primitives for emitting bits to the stream. 87 //===--------------------------------------------------------------------===// 88 89 void Emit(uint32_t Val, unsigned NumBits) { 90 assert(NumBits <= 32 && "Invalid value size!"); 91 assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!"); 92 CurValue |= Val << CurBit; 93 if (CurBit + NumBits < 32) { 94 CurBit += NumBits; 95 return; 96 } 97 98 // Add the current word. 99 unsigned V = CurValue; 100 Out.push_back((unsigned char)(V >> 0)); 101 Out.push_back((unsigned char)(V >> 8)); 102 Out.push_back((unsigned char)(V >> 16)); 103 Out.push_back((unsigned char)(V >> 24)); 104 105 if (CurBit) 106 CurValue = Val >> (32-CurBit); 107 else 108 CurValue = 0; 109 CurBit = (CurBit+NumBits) & 31; 110 } 111 112 void Emit64(uint64_t Val, unsigned NumBits) { 113 if (NumBits <= 32) 114 Emit((uint32_t)Val, NumBits); 115 else { 116 Emit((uint32_t)Val, 32); 117 Emit((uint32_t)(Val >> 32), NumBits-32); 118 } 119 } 120 121 void FlushToWord() { 122 if (CurBit) { 123 unsigned V = CurValue; 124 Out.push_back((unsigned char)(V >> 0)); 125 Out.push_back((unsigned char)(V >> 8)); 126 Out.push_back((unsigned char)(V >> 16)); 127 Out.push_back((unsigned char)(V >> 24)); 128 CurBit = 0; 129 CurValue = 0; 130 } 131 } 132 133 void EmitVBR(uint32_t Val, unsigned NumBits) { 134 uint32_t Threshold = 1U << (NumBits-1); 135 136 // Emit the bits with VBR encoding, NumBits-1 bits at a time. 137 while (Val >= Threshold) { 138 Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits); 139 Val >>= NumBits-1; 140 } 141 142 Emit(Val, NumBits); 143 } 144 145 void EmitVBR64(uint64_t Val, unsigned NumBits) { 146 if ((uint32_t)Val == Val) 147 return EmitVBR((uint32_t)Val, NumBits); 148 149 uint64_t Threshold = 1U << (NumBits-1); 150 151 // Emit the bits with VBR encoding, NumBits-1 bits at a time. 152 while (Val >= Threshold) { 153 Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) | 154 (1 << (NumBits-1)), NumBits); 155 Val >>= NumBits-1; 156 } 157 158 Emit((uint32_t)Val, NumBits); 159 } 160 161 /// EmitCode - Emit the specified code. 162 void EmitCode(unsigned Val) { 163 Emit(Val, CurCodeSize); 164 } 165 166 // BackpatchWord - Backpatch a 32-bit word in the output with the specified 167 // value. 168 void BackpatchWord(unsigned ByteNo, unsigned NewWord) { 169 Out[ByteNo++] = (unsigned char)(NewWord >> 0); 170 Out[ByteNo++] = (unsigned char)(NewWord >> 8); 171 Out[ByteNo++] = (unsigned char)(NewWord >> 16); 172 Out[ByteNo ] = (unsigned char)(NewWord >> 24); 173 } 174 175 //===--------------------------------------------------------------------===// 176 // Block Manipulation 177 //===--------------------------------------------------------------------===// 178 179 /// getBlockInfo - If there is block info for the specified ID, return it, 180 /// otherwise return null. 181 BlockInfo *getBlockInfo(unsigned BlockID) { 182 // Common case, the most recent entry matches BlockID. 183 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) 184 return &BlockInfoRecords.back(); 185 186 for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size()); 187 i != e; ++i) 188 if (BlockInfoRecords[i].BlockID == BlockID) 189 return &BlockInfoRecords[i]; 190 return 0; 191 } 192 193 void EnterSubblock(unsigned BlockID, unsigned CodeLen) { 194 // Block header: 195 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 196 EmitCode(bitc::ENTER_SUBBLOCK); 197 EmitVBR(BlockID, bitc::BlockIDWidth); 198 EmitVBR(CodeLen, bitc::CodeLenWidth); 199 FlushToWord(); 200 201 unsigned BlockSizeWordLoc = static_cast<unsigned>(Out.size()); 202 unsigned OldCodeSize = CurCodeSize; 203 204 // Emit a placeholder, which will be replaced when the block is popped. 205 Emit(0, bitc::BlockSizeWidth); 206 207 CurCodeSize = CodeLen; 208 209 // Push the outer block's abbrev set onto the stack, start out with an 210 // empty abbrev set. 211 BlockScope.push_back(Block(OldCodeSize, BlockSizeWordLoc/4)); 212 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 213 214 // If there is a blockinfo for this BlockID, add all the predefined abbrevs 215 // to the abbrev list. 216 if (BlockInfo *Info = getBlockInfo(BlockID)) { 217 for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size()); 218 i != e; ++i) { 219 CurAbbrevs.push_back(Info->Abbrevs[i]); 220 Info->Abbrevs[i]->addRef(); 221 } 222 } 223 } 224 225 void ExitBlock() { 226 assert(!BlockScope.empty() && "Block scope imbalance!"); 227 228 // Delete all abbrevs. 229 for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size()); 230 i != e; ++i) 231 CurAbbrevs[i]->dropRef(); 232 233 const Block &B = BlockScope.back(); 234 235 // Block tail: 236 // [END_BLOCK, <align4bytes>] 237 EmitCode(bitc::END_BLOCK); 238 FlushToWord(); 239 240 // Compute the size of the block, in words, not counting the size field. 241 unsigned SizeInWords= static_cast<unsigned>(Out.size())/4-B.StartSizeWord-1; 242 unsigned ByteNo = B.StartSizeWord*4; 243 244 // Update the block size field in the header of this sub-block. 245 BackpatchWord(ByteNo, SizeInWords); 246 247 // Restore the inner block's code size and abbrev table. 248 CurCodeSize = B.PrevCodeSize; 249 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 250 BlockScope.pop_back(); 251 } 252 253 //===--------------------------------------------------------------------===// 254 // Record Emission 255 //===--------------------------------------------------------------------===// 256 257private: 258 /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev 259 /// record. This is a no-op, since the abbrev specifies the literal to use. 260 template<typename uintty> 261 void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) { 262 assert(Op.isLiteral() && "Not a literal"); 263 // If the abbrev specifies the literal value to use, don't emit 264 // anything. 265 assert(V == Op.getLiteralValue() && 266 "Invalid abbrev for record!"); 267 } 268 269 /// EmitAbbreviatedField - Emit a single scalar field value with the specified 270 /// encoding. 271 template<typename uintty> 272 void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) { 273 assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!"); 274 275 // Encode the value as we are commanded. 276 switch (Op.getEncoding()) { 277 default: assert(0 && "Unknown encoding!"); 278 case BitCodeAbbrevOp::Fixed: 279 Emit((unsigned)V, (unsigned)Op.getEncodingData()); 280 break; 281 case BitCodeAbbrevOp::VBR: 282 EmitVBR64(V, (unsigned)Op.getEncodingData()); 283 break; 284 case BitCodeAbbrevOp::Char6: 285 Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6); 286 break; 287 } 288 } 289 290 /// EmitRecordWithAbbrevImpl - This is the core implementation of the record 291 /// emission code. If BlobData is non-null, then it specifies an array of 292 /// data that should be emitted as part of the Blob or Array operand that is 293 /// known to exist at the end of the the record. 294 template<typename uintty> 295 void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 296 const char *BlobData, unsigned BlobLen) { 297 unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV; 298 assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); 299 BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo]; 300 301 EmitCode(Abbrev); 302 303 unsigned RecordIdx = 0; 304 for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos()); 305 i != e; ++i) { 306 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 307 if (Op.isLiteral()) { 308 assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); 309 EmitAbbreviatedLiteral(Op, Vals[RecordIdx]); 310 ++RecordIdx; 311 } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 312 // Array case. 313 assert(i+2 == e && "array op not second to last?"); 314 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 315 316 // If this record has blob data, emit it, otherwise we must have record 317 // entries to encode this way. 318 if (BlobData) { 319 assert(RecordIdx == Vals.size() && 320 "Blob data and record entries specified for array!"); 321 // Emit a vbr6 to indicate the number of elements present. 322 EmitVBR(static_cast<uint32_t>(BlobLen), 6); 323 324 // Emit each field. 325 for (unsigned i = 0; i != BlobLen; ++i) 326 EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]); 327 328 // Know that blob data is consumed for assertion below. 329 BlobData = 0; 330 } else { 331 // Emit a vbr6 to indicate the number of elements present. 332 EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6); 333 334 // Emit each field. 335 for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) 336 EmitAbbreviatedField(EltEnc, Vals[RecordIdx]); 337 } 338 } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) { 339 // If this record has blob data, emit it, otherwise we must have record 340 // entries to encode this way. 341 342 // Emit a vbr6 to indicate the number of elements present. 343 if (BlobData) { 344 EmitVBR(static_cast<uint32_t>(BlobLen), 6); 345 assert(RecordIdx == Vals.size() && 346 "Blob data and record entries specified for blob operand!"); 347 } else { 348 EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6); 349 } 350 351 // Flush to a 32-bit alignment boundary. 352 FlushToWord(); 353 assert((Out.size() & 3) == 0 && "Not 32-bit aligned"); 354 355 // Emit each field as a literal byte. 356 if (BlobData) { 357 for (unsigned i = 0; i != BlobLen; ++i) 358 Out.push_back((unsigned char)BlobData[i]); 359 360 // Know that blob data is consumed for assertion below. 361 BlobData = 0; 362 } else { 363 for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) { 364 assert(Vals[RecordIdx] < 256 && "Value too large to emit as blob"); 365 Out.push_back((unsigned char)Vals[RecordIdx]); 366 } 367 } 368 // Align end to 32-bits. 369 while (Out.size() & 3) 370 Out.push_back(0); 371 372 } else { // Single scalar field. 373 assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); 374 EmitAbbreviatedField(Op, Vals[RecordIdx]); 375 ++RecordIdx; 376 } 377 } 378 assert(RecordIdx == Vals.size() && "Not all record operands emitted!"); 379 assert(BlobData == 0 && 380 "Blob data specified for record that doesn't use it!"); 381 } 382 383public: 384 385 /// EmitRecord - Emit the specified record to the stream, using an abbrev if 386 /// we have one to compress the output. 387 template<typename uintty> 388 void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals, 389 unsigned Abbrev = 0) { 390 if (!Abbrev) { 391 // If we don't have an abbrev to use, emit this in its fully unabbreviated 392 // form. 393 EmitCode(bitc::UNABBREV_RECORD); 394 EmitVBR(Code, 6); 395 EmitVBR(static_cast<uint32_t>(Vals.size()), 6); 396 for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i) 397 EmitVBR64(Vals[i], 6); 398 return; 399 } 400 401 // Insert the code into Vals to treat it uniformly. 402 Vals.insert(Vals.begin(), Code); 403 404 EmitRecordWithAbbrev(Abbrev, Vals); 405 } 406 407 /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation. 408 /// Unlike EmitRecord, the code for the record should be included in Vals as 409 /// the first entry. 410 template<typename uintty> 411 void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) { 412 EmitRecordWithAbbrevImpl(Abbrev, Vals, 0, 0); 413 } 414 415 /// EmitRecordWithBlob - Emit the specified record to the stream, using an 416 /// abbrev that includes a blob at the end. The blob data to emit is 417 /// specified by the pointer and length specified at the end. In contrast to 418 /// EmitRecord, this routine expects that the first entry in Vals is the code 419 /// of the record. 420 template<typename uintty> 421 void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 422 const char *BlobData, unsigned BlobLen) { 423 EmitRecordWithAbbrevImpl(Abbrev, Vals, BlobData, BlobLen); 424 } 425 426 /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records 427 /// that end with an array. 428 template<typename uintty> 429 void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 430 const char *ArrayData, unsigned ArrayLen) { 431 EmitRecordWithAbbrevImpl(Abbrev, Vals, ArrayData, ArrayLen); 432 } 433 434 //===--------------------------------------------------------------------===// 435 // Abbrev Emission 436 //===--------------------------------------------------------------------===// 437 438private: 439 // Emit the abbreviation as a DEFINE_ABBREV record. 440 void EncodeAbbrev(BitCodeAbbrev *Abbv) { 441 EmitCode(bitc::DEFINE_ABBREV); 442 EmitVBR(Abbv->getNumOperandInfos(), 5); 443 for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos()); 444 i != e; ++i) { 445 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 446 Emit(Op.isLiteral(), 1); 447 if (Op.isLiteral()) { 448 EmitVBR64(Op.getLiteralValue(), 8); 449 } else { 450 Emit(Op.getEncoding(), 3); 451 if (Op.hasEncodingData()) 452 EmitVBR64(Op.getEncodingData(), 5); 453 } 454 } 455 } 456public: 457 458 /// EmitAbbrev - This emits an abbreviation to the stream. Note that this 459 /// method takes ownership of the specified abbrev. 460 unsigned EmitAbbrev(BitCodeAbbrev *Abbv) { 461 // Emit the abbreviation as a record. 462 EncodeAbbrev(Abbv); 463 CurAbbrevs.push_back(Abbv); 464 return static_cast<unsigned>(CurAbbrevs.size())-1 + 465 bitc::FIRST_APPLICATION_ABBREV; 466 } 467 468 //===--------------------------------------------------------------------===// 469 // BlockInfo Block Emission 470 //===--------------------------------------------------------------------===// 471 472 /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK. 473 void EnterBlockInfoBlock(unsigned CodeWidth) { 474 EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth); 475 BlockInfoCurBID = -1U; 476 } 477private: 478 /// SwitchToBlockID - If we aren't already talking about the specified block 479 /// ID, emit a BLOCKINFO_CODE_SETBID record. 480 void SwitchToBlockID(unsigned BlockID) { 481 if (BlockInfoCurBID == BlockID) return; 482 SmallVector<unsigned, 2> V; 483 V.push_back(BlockID); 484 EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V); 485 BlockInfoCurBID = BlockID; 486 } 487 488 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { 489 if (BlockInfo *BI = getBlockInfo(BlockID)) 490 return *BI; 491 492 // Otherwise, add a new record. 493 BlockInfoRecords.push_back(BlockInfo()); 494 BlockInfoRecords.back().BlockID = BlockID; 495 return BlockInfoRecords.back(); 496 } 497 498public: 499 500 /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified 501 /// BlockID. 502 unsigned EmitBlockInfoAbbrev(unsigned BlockID, BitCodeAbbrev *Abbv) { 503 SwitchToBlockID(BlockID); 504 EncodeAbbrev(Abbv); 505 506 // Add the abbrev to the specified block record. 507 BlockInfo &Info = getOrCreateBlockInfo(BlockID); 508 Info.Abbrevs.push_back(Abbv); 509 510 return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV; 511 } 512}; 513 514 515} // End llvm namespace 516 517#endif 518