From b35ca9db9ce25e6b61aa3eaee41464f647d34899 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 22 Apr 2007 06:22:05 +0000 Subject: [PATCH] Define the content-independent interfaces to read/write bitcode files and the high-level interface to read/write LLVM IR bitcode files. This is a work in progress. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@36329 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Bitcode/BitCodes.h | 46 ++++++ include/llvm/Bitcode/BitstreamReader.h | 220 +++++++++++++++++++++++++ include/llvm/Bitcode/BitstreamWriter.h | 219 ++++++++++++++++++++++++ include/llvm/Bitcode/ReaderWriter.h | 38 +++++ 4 files changed, 523 insertions(+) create mode 100644 include/llvm/Bitcode/BitCodes.h create mode 100644 include/llvm/Bitcode/BitstreamReader.h create mode 100644 include/llvm/Bitcode/BitstreamWriter.h create mode 100644 include/llvm/Bitcode/ReaderWriter.h diff --git a/include/llvm/Bitcode/BitCodes.h b/include/llvm/Bitcode/BitCodes.h new file mode 100644 index 00000000000..a3b741b2e92 --- /dev/null +++ b/include/llvm/Bitcode/BitCodes.h @@ -0,0 +1,46 @@ +//===- BitCodes.h - Enum values for the bitcode format ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header Bitcode enum values. +// +// The enum values defined in this file should be considered permanent. If +// new features are added, they should have values added at the end of the +// respective lists. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITCODE_BITCODES_H +#define LLVM_BITCODE_BITCODES_H + +namespace llvm { +namespace bitc { + enum StandardWidths { + BlockIDWidth = 8, // We use VBR-8 for block IDs. + CodeLenWidth = 4, // Codelen are VBR-4. + BlockSizeWidth = 32 // BlockSize up to 2^32 32-bit words = 32GB per block. + }; + + // The standard code namespace always has a way to exit a block, enter a + // nested block, define abbrevs, and define an unabbreviated record. + enum FixedCodes { + END_BLOCK = 0, // Must be zero to guarantee termination for broken bitcode. + ENTER_SUBBLOCK = 1, + + // Two codes are reserved for defining abbrevs and for emitting an + // unabbreviated record. + DEFINE_ABBREVS = 2, + UNABBREV_RECORD = 3, + + // This is not a code, this is a marker for the first abbrev assignment. + FIRST_ABBREV = 4 + }; +} // End bitc namespace +} // End llvm namespace + +#endif diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h new file mode 100644 index 00000000000..ceae5afbe98 --- /dev/null +++ b/include/llvm/Bitcode/BitstreamReader.h @@ -0,0 +1,220 @@ +//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header defines the BitstreamReader class. This class can be used to +// read an arbitrary bitstream, regardless of its contents. +// +//===----------------------------------------------------------------------===// + +#ifndef BITSTREAM_READER_H +#define BITSTREAM_READER_H + +#include "llvm/Bitcode/BitCodes.h" +#include "llvm/ADT/SmallVector.h" +#include + +namespace llvm { + +class BitstreamReader { + const unsigned char *NextChar; + const unsigned char *LastChar; + + /// CurWord - This is the current data we have pulled from the stream but have + /// not returned to the client. + uint32_t CurWord; + + /// BitsInCurWord - This is the number of bits in CurWord that are valid. This + /// is always from [0...31] inclusive. + unsigned BitsInCurWord; + + // CurCodeSize - This is the declared size of code values used for the current + // block, in bits. + unsigned CurCodeSize; + + /// BlockScope - This tracks the codesize of parent blocks. + SmallVector BlockScope; + +public: + BitstreamReader(const unsigned char *Start, const unsigned char *End) + : NextChar(Start), LastChar(End) { + assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes"); + CurWord = 0; + BitsInCurWord = 0; + CurCodeSize = 2; + } + + bool AtEndOfStream() const { return NextChar == LastChar; } + + uint32_t Read(unsigned NumBits) { + // If the field is fully contained by CurWord, return it quickly. + if (BitsInCurWord >= NumBits) { + uint32_t R = CurWord & ((1U << NumBits)-1); + CurWord >>= NumBits; + BitsInCurWord -= NumBits; + return R; + } + + // If we run out of data, stop at the end of the stream. + if (LastChar == NextChar) { + CurWord = 0; + BitsInCurWord = 0; + return 0; + } + + unsigned R = CurWord; + + // Read the next word from the stream. + CurWord = (NextChar[0] << 0) | (NextChar[1] << 8) | + (NextChar[2] << 16) | (NextChar[3] << 24); + NextChar += 4; + + // Extract NumBits-BitsInCurWord from what we just read. + unsigned BitsLeft = NumBits-BitsInCurWord; + + // Be careful here, BitsLeft is in the range [1..32] inclusive. + R |= (CurWord & (~0U >> (32-BitsLeft))) << BitsInCurWord; + + // BitsLeft bits have just been used up from CurWord. + if (BitsLeft != 32) + CurWord >>= BitsLeft; + else + CurWord = 0; + BitsInCurWord = 32-BitsLeft; + return R; + } + + uint32_t ReadVBR(unsigned NumBits) { + uint32_t Piece = Read(NumBits); + if ((Piece & (1U << NumBits-1)) == 0) + return Piece; + + uint32_t Result = 0; + unsigned NextBit = 0; + while (1) { + Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; + + if ((Piece & (1U << NumBits-1)) == 0) + return Result; + + NextBit += NumBits-1; + Piece = Read(NumBits); + } + } + + uint64_t ReadVBR64(unsigned NumBits) { + uint64_t Piece = Read(NumBits); + if ((Piece & (1U << NumBits-1)) == 0) + return Piece; + + uint64_t Result = 0; + unsigned NextBit = 0; + while (1) { + Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; + + if ((Piece & (1U << NumBits-1)) == 0) + return Result; + + NextBit += NumBits-1; + Piece = Read(NumBits); + } + } + + void SkipToWord() { + BitsInCurWord = 0; + CurWord = 0; + } + + + unsigned ReadCode() { + return Read(CurCodeSize); + } + + //===--------------------------------------------------------------------===// + // Block Manipulation + //===--------------------------------------------------------------------===// + + // Block header: + // [ENTER_SUBBLOCK, blockid, newcodelen, , blocklen] + + /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for + /// the block. + unsigned ReadSubBlockID() { + return ReadVBR(bitc::BlockIDWidth); + } + + /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip + /// over the body of this block. If the block record is malformed, return + /// true. + bool SkipBlock() { + // Read and ignore the codelen value. Since we are skipping this block, we + // don't care what code widths are used inside of it. + ReadVBR(bitc::CodeLenWidth); + SkipToWord(); + unsigned NumWords = Read(bitc::BlockSizeWidth); + + // Check that the block wasn't partially defined, and that the offset isn't + // bogus. + if (AtEndOfStream() || NextChar+NumWords*4 > LastChar) + return true; + + NextChar += NumWords*4; + return false; + } + + /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, read and enter + /// the block, returning the BlockID of the block we just entered. + bool EnterSubBlock() { + BlockScope.push_back(CurCodeSize); + + // Get the codesize of this block. + CurCodeSize = ReadVBR(bitc::CodeLenWidth); + SkipToWord(); + unsigned NumWords = Read(bitc::BlockSizeWidth); + + // Validate that this block is sane. + if (CurCodeSize == 0 || AtEndOfStream() || NextChar+NumWords*4 > LastChar) + return true; + + return false; + } + + bool ReadBlockEnd() { + if (BlockScope.empty()) return true; + + // Block tail: + // [END_BLOCK, ] + SkipToWord(); + CurCodeSize = BlockScope.back(); + BlockScope.pop_back(); + return false; + } + + //===--------------------------------------------------------------------===// + // Record Processing + //===--------------------------------------------------------------------===// + + unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl &Vals) { + if (AbbrevID == bitc::UNABBREV_RECORD) { + unsigned Code = ReadVBR(6); + unsigned NumElts = ReadVBR(6); + for (unsigned i = 0; i != NumElts; ++i) + Vals.push_back(ReadVBR64(6)); + return Code; + } + + assert(0 && "Reading with abbrevs not implemented!"); + } + +}; + +} // End llvm namespace + +#endif + + \ No newline at end of file diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h new file mode 100644 index 00000000000..4aef53bf7c3 --- /dev/null +++ b/include/llvm/Bitcode/BitstreamWriter.h @@ -0,0 +1,219 @@ +//===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header defines the BitstreamWriter class. This class can be used to +// write an arbitrary bitstream, regardless of its contents. +// +//===----------------------------------------------------------------------===// + +#ifndef BITSTREAM_WRITER_H +#define BITSTREAM_WRITER_H + +#include "llvm/Bitcode/BitCodes.h" +#include "llvm/ADT/SmallVector.h" +#include +#include + +namespace llvm { + +class BitstreamWriter { + std::vector &Out; + + /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use. + unsigned CurBit; + + /// CurValue - The current value. Only bits < CurBit are valid. + uint32_t CurValue; + + // CurCodeSize - This is the declared size of code values used for the current + // block, in bits. + unsigned CurCodeSize; + + struct Block { + unsigned PrevCodeSize; + unsigned StartSizeWord; + Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {} + }; + + /// BlockScope - This tracks the current blocks that we have entered. + std::vector BlockScope; +public: + BitstreamWriter(std::vector &O) + : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {} + + ~BitstreamWriter() { + assert(CurBit == 0 && "Unflused data remaining"); + assert(BlockScope.empty() && "Block imbalance"); + } + //===--------------------------------------------------------------------===// + // Basic Primitives for emitting bits to the stream. + //===--------------------------------------------------------------------===// + + void Emit(uint32_t Val, unsigned NumBits) { + assert(NumBits <= 32 && "Invalid value size!"); + assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!"); + CurValue |= Val << CurBit; + if (CurBit + NumBits < 32) { + CurBit += NumBits; + return; + } + + // Add the current word. + unsigned V = CurValue; + Out.push_back((unsigned char)(V >> 0)); + Out.push_back((unsigned char)(V >> 8)); + Out.push_back((unsigned char)(V >> 16)); + Out.push_back((unsigned char)(V >> 24)); + + if (CurBit) + CurValue = Val >> 32-CurBit; + else + CurValue = 0; + CurBit = (CurBit+NumBits) & 31; + } + + void Emit64(uint64_t Val, unsigned NumBits) { + if (NumBits <= 32) + Emit((uint32_t)Val, NumBits); + else { + Emit((uint32_t)Val, 32); + Emit((uint32_t)(Val >> 32), NumBits-32); + } + } + + void FlushToWord() { + if (CurBit) { + unsigned V = CurValue; + Out.push_back((unsigned char)(V >> 0)); + Out.push_back((unsigned char)(V >> 8)); + Out.push_back((unsigned char)(V >> 16)); + Out.push_back((unsigned char)(V >> 24)); + CurBit = 0; + CurValue = 0; + } + } + + void EmitVBR(uint32_t Val, unsigned NumBits) { + uint32_t Threshold = 1U << (NumBits-1); + + // Emit the bits with VBR encoding, NumBits-1 bits at a time. + while (Val >= Threshold) { + Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits); + Val >>= NumBits-1; + } + + Emit(Val, NumBits); + } + + void EmitVBR64(uint64_t Val, unsigned NumBits) { + if ((uint32_t)Val == Val) + return EmitVBR((uint32_t)Val, NumBits); + + uint64_t Threshold = 1U << (NumBits-1); + + // Emit the bits with VBR encoding, NumBits-1 bits at a time. + while (Val >= Threshold) { + Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) | + (1 << (NumBits-1)), NumBits); + Val >>= NumBits-1; + } + + Emit((uint32_t)Val, NumBits); + } + + /// EmitCode - Emit the specified code. + void EmitCode(unsigned Val) { + Emit(Val, CurCodeSize); + } + + //===--------------------------------------------------------------------===// + // Block Manipulation + //===--------------------------------------------------------------------===// + + void EnterSubblock(unsigned BlockID, unsigned CodeLen) { + // Block header: + // [ENTER_SUBBLOCK, blockid, newcodelen, , blocklen] + EmitCode(bitc::ENTER_SUBBLOCK); + EmitVBR(BlockID, bitc::BlockIDWidth); + EmitVBR(CodeLen, bitc::CodeLenWidth); + FlushToWord(); + BlockScope.push_back(Block(CurCodeSize, Out.size()/4)); + // Emit a placeholder, which will be replaced when the block is popped. + Emit(0, bitc::BlockSizeWidth); + + CurCodeSize = CodeLen; + } + + void ExitBlock() { + assert(!BlockScope.empty() && "Block scope imbalance!"); + Block B = BlockScope.back(); + BlockScope.pop_back(); + + // Block tail: + // [END_BLOCK, ] + EmitCode(bitc::END_BLOCK); + FlushToWord(); + + // Compute the size of the block, in words, not counting the size field. + unsigned SizeInWords = Out.size()/4-B.StartSizeWord - 1; + unsigned ByteNo = B.StartSizeWord*4; + + // Update the block size field in the header of this sub-block. + Out[ByteNo++] = (unsigned char)(SizeInWords >> 0); + Out[ByteNo++] = (unsigned char)(SizeInWords >> 8); + Out[ByteNo++] = (unsigned char)(SizeInWords >> 16); + Out[ByteNo++] = (unsigned char)(SizeInWords >> 24); + + // Restore the outer block's code size. + CurCodeSize = B.PrevCodeSize; + } + + //===--------------------------------------------------------------------===// + // Record Emission + //===--------------------------------------------------------------------===// + + /// EmitRecord - Emit the specified record to the stream, using an abbrev if + /// we have one to compress the output. + void EmitRecord(unsigned Code, SmallVectorImpl &Vals, + unsigned Abbrev = 0) { + if (Abbrev) { + assert(0 && "abbrevs not implemented yet!"); + } else { + // If we don't have an abbrev to use, emit this in its fully unabbreviated + // form. + EmitCode(bitc::UNABBREV_RECORD); + EmitVBR(Code, 6); + EmitVBR(Vals.size(), 6); + for (unsigned i = 0, e = Vals.size(); i != e; ++i) + EmitVBR64(Vals[i], 6); + } + } + + /// EmitRecord - Emit the specified record to the stream, using an abbrev if + /// we have one to compress the output. + void EmitRecord(unsigned Code, SmallVectorImpl &Vals, + unsigned Abbrev = 0) { + if (Abbrev) { + assert(0 && "abbrevs not implemented yet!"); + } else { + // If we don't have an abbrev to use, emit this in its fully unabbreviated + // form. + EmitCode(bitc::UNABBREV_RECORD); + EmitVBR(Code, 6); + EmitVBR(Vals.size(), 6); + for (unsigned i = 0, e = Vals.size(); i != e; ++i) + EmitVBR(Vals[i], 6); + } + } +}; + + +} // End llvm namespace + +#endif diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h new file mode 100644 index 00000000000..0522f1bb28e --- /dev/null +++ b/include/llvm/Bitcode/ReaderWriter.h @@ -0,0 +1,38 @@ +//===-- llvm/Bitcode/ReaderWriter.h - Bitcode reader/writers ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header defines interfaces to read and write LLVM bitcode files/streams. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITCODE_H +#define LLVM_BITCODE_H + +#include +#include + +namespace llvm { + class Module; + class ModuleProvider; + + ModuleProvider *getBitcodeModuleProvider(const std::string &Filename, + std::string *ErrMsg = 0); + + + /// ParseBitcodeFile - Read the specified bitcode file, returning the module. + /// If an error occurs, return null and fill in *ErrMsg if non-null. + Module *ParseBitcodeFile(const std::string &Filename, + std::string *ErrMsg = 0); + + /// WriteBitcodeToFile - Write the specified module to the specified output + /// stream. + void WriteBitcodeToFile(const Module *M, std::ostream &Out); +} // End llvm namespace + +#endif