1 //===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This header defines the BitstreamWriter class. This class can be used to 11 // write an arbitrary bitstream, regardless of its contents. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_BITCODE_BITSTREAMWRITER_H 16 #define LLVM_BITCODE_BITSTREAMWRITER_H 17 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/Bitcode/BitCodes.h" 21 #include "llvm/Support/Endian.h" 22 #include <vector> 23 24 namespace llvm { 25 26 class BitstreamWriter { 27 SmallVectorImpl<char> &Out; 28 29 /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use. 30 unsigned CurBit; 31 32 /// CurValue - The current value. Only bits < CurBit are valid. 33 uint32_t CurValue; 34 35 /// CurCodeSize - This is the declared size of code values used for the 36 /// current block, in bits. 37 unsigned CurCodeSize; 38 39 /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently 40 /// selected BLOCK ID. 41 unsigned BlockInfoCurBID; 42 43 /// CurAbbrevs - Abbrevs installed at in this block. 44 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs; 45 46 struct Block { 47 unsigned PrevCodeSize; 48 unsigned StartSizeWord; 49 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs; BlockBlock50 Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {} 51 }; 52 53 /// BlockScope - This tracks the current blocks that we have entered. 54 std::vector<Block> BlockScope; 55 56 /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks. 57 /// These describe abbreviations that all blocks of the specified ID inherit. 58 struct BlockInfo { 59 unsigned BlockID; 60 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs; 61 }; 62 std::vector<BlockInfo> BlockInfoRecords; 63 64 // BackpatchWord - Backpatch a 32-bit word in the output with the specified 65 // value. BackpatchWord(unsigned ByteNo,unsigned NewWord)66 void BackpatchWord(unsigned ByteNo, unsigned NewWord) { 67 support::endian::write32le(&Out[ByteNo], NewWord); 68 } 69 WriteByte(unsigned char Value)70 void WriteByte(unsigned char Value) { 71 Out.push_back(Value); 72 } 73 WriteWord(unsigned Value)74 void WriteWord(unsigned Value) { 75 Value = support::endian::byte_swap<uint32_t, support::little>(Value); 76 Out.append(reinterpret_cast<const char *>(&Value), 77 reinterpret_cast<const char *>(&Value + 1)); 78 } 79 GetBufferOffset()80 unsigned GetBufferOffset() const { 81 return Out.size(); 82 } 83 GetWordIndex()84 unsigned GetWordIndex() const { 85 unsigned Offset = GetBufferOffset(); 86 assert((Offset & 3) == 0 && "Not 32-bit aligned"); 87 return Offset / 4; 88 } 89 90 public: BitstreamWriter(SmallVectorImpl<char> & O)91 explicit BitstreamWriter(SmallVectorImpl<char> &O) 92 : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {} 93 ~BitstreamWriter()94 ~BitstreamWriter() { 95 assert(CurBit == 0 && "Unflushed data remaining"); 96 assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance"); 97 } 98 99 /// \brief Retrieve the current position in the stream, in bits. GetCurrentBitNo()100 uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; } 101 102 //===--------------------------------------------------------------------===// 103 // Basic Primitives for emitting bits to the stream. 104 //===--------------------------------------------------------------------===// 105 Emit(uint32_t Val,unsigned NumBits)106 void Emit(uint32_t Val, unsigned NumBits) { 107 assert(NumBits && NumBits <= 32 && "Invalid value size!"); 108 assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!"); 109 CurValue |= Val << CurBit; 110 if (CurBit + NumBits < 32) { 111 CurBit += NumBits; 112 return; 113 } 114 115 // Add the current word. 116 WriteWord(CurValue); 117 118 if (CurBit) 119 CurValue = Val >> (32-CurBit); 120 else 121 CurValue = 0; 122 CurBit = (CurBit+NumBits) & 31; 123 } 124 Emit64(uint64_t Val,unsigned NumBits)125 void Emit64(uint64_t Val, unsigned NumBits) { 126 if (NumBits <= 32) 127 Emit((uint32_t)Val, NumBits); 128 else { 129 Emit((uint32_t)Val, 32); 130 Emit((uint32_t)(Val >> 32), NumBits-32); 131 } 132 } 133 FlushToWord()134 void FlushToWord() { 135 if (CurBit) { 136 WriteWord(CurValue); 137 CurBit = 0; 138 CurValue = 0; 139 } 140 } 141 EmitVBR(uint32_t Val,unsigned NumBits)142 void EmitVBR(uint32_t Val, unsigned NumBits) { 143 assert(NumBits <= 32 && "Too many bits to emit!"); 144 uint32_t Threshold = 1U << (NumBits-1); 145 146 // Emit the bits with VBR encoding, NumBits-1 bits at a time. 147 while (Val >= Threshold) { 148 Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits); 149 Val >>= NumBits-1; 150 } 151 152 Emit(Val, NumBits); 153 } 154 EmitVBR64(uint64_t Val,unsigned NumBits)155 void EmitVBR64(uint64_t Val, unsigned NumBits) { 156 assert(NumBits <= 32 && "Too many bits to emit!"); 157 if ((uint32_t)Val == Val) 158 return EmitVBR((uint32_t)Val, NumBits); 159 160 uint32_t Threshold = 1U << (NumBits-1); 161 162 // Emit the bits with VBR encoding, NumBits-1 bits at a time. 163 while (Val >= Threshold) { 164 Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) | 165 (1 << (NumBits-1)), NumBits); 166 Val >>= NumBits-1; 167 } 168 169 Emit((uint32_t)Val, NumBits); 170 } 171 172 /// EmitCode - Emit the specified code. EmitCode(unsigned Val)173 void EmitCode(unsigned Val) { 174 Emit(Val, CurCodeSize); 175 } 176 177 //===--------------------------------------------------------------------===// 178 // Block Manipulation 179 //===--------------------------------------------------------------------===// 180 181 /// getBlockInfo - If there is block info for the specified ID, return it, 182 /// otherwise return null. getBlockInfo(unsigned BlockID)183 BlockInfo *getBlockInfo(unsigned BlockID) { 184 // Common case, the most recent entry matches BlockID. 185 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) 186 return &BlockInfoRecords.back(); 187 188 for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size()); 189 i != e; ++i) 190 if (BlockInfoRecords[i].BlockID == BlockID) 191 return &BlockInfoRecords[i]; 192 return nullptr; 193 } 194 EnterSubblock(unsigned BlockID,unsigned CodeLen)195 void EnterSubblock(unsigned BlockID, unsigned CodeLen) { 196 // Block header: 197 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 198 EmitCode(bitc::ENTER_SUBBLOCK); 199 EmitVBR(BlockID, bitc::BlockIDWidth); 200 EmitVBR(CodeLen, bitc::CodeLenWidth); 201 FlushToWord(); 202 203 unsigned BlockSizeWordIndex = GetWordIndex(); 204 unsigned OldCodeSize = CurCodeSize; 205 206 // Emit a placeholder, which will be replaced when the block is popped. 207 Emit(0, bitc::BlockSizeWidth); 208 209 CurCodeSize = CodeLen; 210 211 // Push the outer block's abbrev set onto the stack, start out with an 212 // empty abbrev set. 213 BlockScope.emplace_back(OldCodeSize, BlockSizeWordIndex); 214 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 215 216 // If there is a blockinfo for this BlockID, add all the predefined abbrevs 217 // to the abbrev list. 218 if (BlockInfo *Info = getBlockInfo(BlockID)) { 219 CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(), 220 Info->Abbrevs.end()); 221 } 222 } 223 ExitBlock()224 void ExitBlock() { 225 assert(!BlockScope.empty() && "Block scope imbalance!"); 226 const Block &B = BlockScope.back(); 227 228 // Block tail: 229 // [END_BLOCK, <align4bytes>] 230 EmitCode(bitc::END_BLOCK); 231 FlushToWord(); 232 233 // Compute the size of the block, in words, not counting the size field. 234 unsigned SizeInWords = GetWordIndex() - B.StartSizeWord - 1; 235 unsigned ByteNo = B.StartSizeWord*4; 236 237 // Update the block size field in the header of this sub-block. 238 BackpatchWord(ByteNo, SizeInWords); 239 240 // Restore the inner block's code size and abbrev table. 241 CurCodeSize = B.PrevCodeSize; 242 CurAbbrevs = std::move(B.PrevAbbrevs); 243 BlockScope.pop_back(); 244 } 245 246 //===--------------------------------------------------------------------===// 247 // Record Emission 248 //===--------------------------------------------------------------------===// 249 250 private: 251 /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev 252 /// record. This is a no-op, since the abbrev specifies the literal to use. 253 template<typename uintty> EmitAbbreviatedLiteral(const BitCodeAbbrevOp & Op,uintty V)254 void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) { 255 assert(Op.isLiteral() && "Not a literal"); 256 // If the abbrev specifies the literal value to use, don't emit 257 // anything. 258 assert(V == Op.getLiteralValue() && 259 "Invalid abbrev for record!"); 260 } 261 262 /// EmitAbbreviatedField - Emit a single scalar field value with the specified 263 /// encoding. 264 template<typename uintty> EmitAbbreviatedField(const BitCodeAbbrevOp & Op,uintty V)265 void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) { 266 assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!"); 267 268 // Encode the value as we are commanded. 269 switch (Op.getEncoding()) { 270 default: llvm_unreachable("Unknown encoding!"); 271 case BitCodeAbbrevOp::Fixed: 272 if (Op.getEncodingData()) 273 Emit((unsigned)V, (unsigned)Op.getEncodingData()); 274 break; 275 case BitCodeAbbrevOp::VBR: 276 if (Op.getEncodingData()) 277 EmitVBR64(V, (unsigned)Op.getEncodingData()); 278 break; 279 case BitCodeAbbrevOp::Char6: 280 Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6); 281 break; 282 } 283 } 284 285 /// EmitRecordWithAbbrevImpl - This is the core implementation of the record 286 /// emission code. If BlobData is non-null, then it specifies an array of 287 /// data that should be emitted as part of the Blob or Array operand that is 288 /// known to exist at the end of the record. 289 template<typename uintty> EmitRecordWithAbbrevImpl(unsigned Abbrev,SmallVectorImpl<uintty> & Vals,StringRef Blob)290 void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 291 StringRef Blob) { 292 const char *BlobData = Blob.data(); 293 unsigned BlobLen = (unsigned) Blob.size(); 294 unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV; 295 assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); 296 const BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo].get(); 297 298 EmitCode(Abbrev); 299 300 unsigned RecordIdx = 0; 301 for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos()); 302 i != e; ++i) { 303 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 304 if (Op.isLiteral()) { 305 assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); 306 EmitAbbreviatedLiteral(Op, Vals[RecordIdx]); 307 ++RecordIdx; 308 } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 309 // Array case. 310 assert(i+2 == e && "array op not second to last?"); 311 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 312 313 // If this record has blob data, emit it, otherwise we must have record 314 // entries to encode this way. 315 if (BlobData) { 316 assert(RecordIdx == Vals.size() && 317 "Blob data and record entries specified for array!"); 318 // Emit a vbr6 to indicate the number of elements present. 319 EmitVBR(static_cast<uint32_t>(BlobLen), 6); 320 321 // Emit each field. 322 for (unsigned i = 0; i != BlobLen; ++i) 323 EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]); 324 325 // Know that blob data is consumed for assertion below. 326 BlobData = nullptr; 327 } else { 328 // Emit a vbr6 to indicate the number of elements present. 329 EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6); 330 331 // Emit each field. 332 for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) 333 EmitAbbreviatedField(EltEnc, Vals[RecordIdx]); 334 } 335 } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) { 336 // If this record has blob data, emit it, otherwise we must have record 337 // entries to encode this way. 338 339 // Emit a vbr6 to indicate the number of elements present. 340 if (BlobData) { 341 EmitVBR(static_cast<uint32_t>(BlobLen), 6); 342 assert(RecordIdx == Vals.size() && 343 "Blob data and record entries specified for blob operand!"); 344 } else { 345 EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6); 346 } 347 348 // Flush to a 32-bit alignment boundary. 349 FlushToWord(); 350 351 // Emit each field as a literal byte. 352 if (BlobData) { 353 for (unsigned i = 0; i != BlobLen; ++i) 354 WriteByte((unsigned char)BlobData[i]); 355 356 // Know that blob data is consumed for assertion below. 357 BlobData = nullptr; 358 } else { 359 for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) { 360 assert(isUInt<8>(Vals[RecordIdx]) && 361 "Value too large to emit as blob"); 362 WriteByte((unsigned char)Vals[RecordIdx]); 363 } 364 } 365 366 // Align end to 32-bits. 367 while (GetBufferOffset() & 3) 368 WriteByte(0); 369 } else { // Single scalar field. 370 assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); 371 EmitAbbreviatedField(Op, Vals[RecordIdx]); 372 ++RecordIdx; 373 } 374 } 375 assert(RecordIdx == Vals.size() && "Not all record operands emitted!"); 376 assert(BlobData == nullptr && 377 "Blob data specified for record that doesn't use it!"); 378 } 379 380 public: 381 382 /// EmitRecord - Emit the specified record to the stream, using an abbrev if 383 /// we have one to compress the output. 384 template<typename uintty> 385 void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals, 386 unsigned Abbrev = 0) { 387 if (!Abbrev) { 388 // If we don't have an abbrev to use, emit this in its fully unabbreviated 389 // form. 390 EmitCode(bitc::UNABBREV_RECORD); 391 EmitVBR(Code, 6); 392 EmitVBR(static_cast<uint32_t>(Vals.size()), 6); 393 for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i) 394 EmitVBR64(Vals[i], 6); 395 return; 396 } 397 398 // Insert the code into Vals to treat it uniformly. 399 Vals.insert(Vals.begin(), Code); 400 401 EmitRecordWithAbbrev(Abbrev, Vals); 402 } 403 404 /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation. 405 /// Unlike EmitRecord, the code for the record should be included in Vals as 406 /// the first entry. 407 template<typename uintty> EmitRecordWithAbbrev(unsigned Abbrev,SmallVectorImpl<uintty> & Vals)408 void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) { 409 EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef()); 410 } 411 412 /// EmitRecordWithBlob - Emit the specified record to the stream, using an 413 /// abbrev that includes a blob at the end. The blob data to emit is 414 /// specified by the pointer and length specified at the end. In contrast to 415 /// EmitRecord, this routine expects that the first entry in Vals is the code 416 /// of the record. 417 template<typename uintty> EmitRecordWithBlob(unsigned Abbrev,SmallVectorImpl<uintty> & Vals,StringRef Blob)418 void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 419 StringRef Blob) { 420 EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob); 421 } 422 template<typename uintty> EmitRecordWithBlob(unsigned Abbrev,SmallVectorImpl<uintty> & Vals,const char * BlobData,unsigned BlobLen)423 void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 424 const char *BlobData, unsigned BlobLen) { 425 return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(BlobData, BlobLen)); 426 } 427 428 /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records 429 /// that end with an array. 430 template<typename uintty> EmitRecordWithArray(unsigned Abbrev,SmallVectorImpl<uintty> & Vals,StringRef Array)431 void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 432 StringRef Array) { 433 EmitRecordWithAbbrevImpl(Abbrev, Vals, Array); 434 } 435 template<typename uintty> EmitRecordWithArray(unsigned Abbrev,SmallVectorImpl<uintty> & Vals,const char * ArrayData,unsigned ArrayLen)436 void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 437 const char *ArrayData, unsigned ArrayLen) { 438 return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData, 439 ArrayLen)); 440 } 441 442 //===--------------------------------------------------------------------===// 443 // Abbrev Emission 444 //===--------------------------------------------------------------------===// 445 446 private: 447 // Emit the abbreviation as a DEFINE_ABBREV record. EncodeAbbrev(BitCodeAbbrev * Abbv)448 void EncodeAbbrev(BitCodeAbbrev *Abbv) { 449 EmitCode(bitc::DEFINE_ABBREV); 450 EmitVBR(Abbv->getNumOperandInfos(), 5); 451 for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos()); 452 i != e; ++i) { 453 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 454 Emit(Op.isLiteral(), 1); 455 if (Op.isLiteral()) { 456 EmitVBR64(Op.getLiteralValue(), 8); 457 } else { 458 Emit(Op.getEncoding(), 3); 459 if (Op.hasEncodingData()) 460 EmitVBR64(Op.getEncodingData(), 5); 461 } 462 } 463 } 464 public: 465 466 /// EmitAbbrev - This emits an abbreviation to the stream. Note that this 467 /// method takes ownership of the specified abbrev. EmitAbbrev(BitCodeAbbrev * Abbv)468 unsigned EmitAbbrev(BitCodeAbbrev *Abbv) { 469 // Emit the abbreviation as a record. 470 EncodeAbbrev(Abbv); 471 CurAbbrevs.push_back(Abbv); 472 return static_cast<unsigned>(CurAbbrevs.size())-1 + 473 bitc::FIRST_APPLICATION_ABBREV; 474 } 475 476 //===--------------------------------------------------------------------===// 477 // BlockInfo Block Emission 478 //===--------------------------------------------------------------------===// 479 480 /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK. EnterBlockInfoBlock(unsigned CodeWidth)481 void EnterBlockInfoBlock(unsigned CodeWidth) { 482 EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth); 483 BlockInfoCurBID = ~0U; 484 } 485 private: 486 /// SwitchToBlockID - If we aren't already talking about the specified block 487 /// ID, emit a BLOCKINFO_CODE_SETBID record. SwitchToBlockID(unsigned BlockID)488 void SwitchToBlockID(unsigned BlockID) { 489 if (BlockInfoCurBID == BlockID) return; 490 SmallVector<unsigned, 2> V; 491 V.push_back(BlockID); 492 EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V); 493 BlockInfoCurBID = BlockID; 494 } 495 getOrCreateBlockInfo(unsigned BlockID)496 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { 497 if (BlockInfo *BI = getBlockInfo(BlockID)) 498 return *BI; 499 500 // Otherwise, add a new record. 501 BlockInfoRecords.emplace_back(); 502 BlockInfoRecords.back().BlockID = BlockID; 503 return BlockInfoRecords.back(); 504 } 505 506 public: 507 508 /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified 509 /// BlockID. EmitBlockInfoAbbrev(unsigned BlockID,BitCodeAbbrev * Abbv)510 unsigned EmitBlockInfoAbbrev(unsigned BlockID, BitCodeAbbrev *Abbv) { 511 SwitchToBlockID(BlockID); 512 EncodeAbbrev(Abbv); 513 514 // Add the abbrev to the specified block record. 515 BlockInfo &Info = getOrCreateBlockInfo(BlockID); 516 Info.Abbrevs.push_back(Abbv); 517 518 return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV; 519 } 520 }; 521 522 523 } // End llvm namespace 524 525 #endif 526