From b35ca9db9ce25e6b61aa3eaee41464f647d34899 Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Sun, 22 Apr 2007 06:22:05 +0000
Subject: [PATCH] Define the content-independent interfaces to read/write
 bitcode files and the high-level interface to read/write LLVM IR bitcode
 files.

This is a work in progress.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@36329 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Bitcode/BitCodes.h        |  46 ++++++
 include/llvm/Bitcode/BitstreamReader.h | 220 +++++++++++++++++++++++++
 include/llvm/Bitcode/BitstreamWriter.h | 219 ++++++++++++++++++++++++
 include/llvm/Bitcode/ReaderWriter.h    |  38 +++++
 4 files changed, 523 insertions(+)
 create mode 100644 include/llvm/Bitcode/BitCodes.h
 create mode 100644 include/llvm/Bitcode/BitstreamReader.h
 create mode 100644 include/llvm/Bitcode/BitstreamWriter.h
 create mode 100644 include/llvm/Bitcode/ReaderWriter.h
diff --git a/include/llvm/Bitcode/BitCodes.h b/include/llvm/Bitcode/BitCodes.h
new file mode 100644
index 00000000000..a3b741b2e92
--- /dev/null
+++ b/include/llvm/Bitcode/BitCodes.h
@@ -0,0 +1,46 @@
+//===- BitCodes.h - Enum values for the bitcode format ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header Bitcode enum values.
+//
+// The enum values defined in this file should be considered permanent.  If
+// new features are added, they should have values added at the end of the
+// respective lists.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_BITCODES_H
+#define LLVM_BITCODE_BITCODES_H
+
+namespace llvm {
+namespace bitc {
+  enum StandardWidths {
+    BlockIDWidth = 8,  // We use VBR-8 for block IDs.
+    CodeLenWidth = 4,  // Codelen are VBR-4.
+    BlockSizeWidth = 32  // BlockSize up to 2^32 32-bit words = 32GB per block.
+  };
+  
+  // The standard code namespace always has a way to exit a block, enter a
+  // nested block, define abbrevs, and define an unabbreviated record.
+  enum FixedCodes {
+    END_BLOCK = 0,  // Must be zero to guarantee termination for broken bitcode.
+    ENTER_SUBBLOCK = 1,
+    
+    // Two codes are reserved for defining abbrevs and for emitting an
+    // unabbreviated record.
+    DEFINE_ABBREVS = 2,
+    UNABBREV_RECORD = 3,
+    
+    // This is not a code, this is a marker for the first abbrev assignment.
+    FIRST_ABBREV = 4
+  };
+} // End bitc namespace
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
new file mode 100644
index 00000000000..ceae5afbe98
--- /dev/null
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -0,0 +1,220 @@
+//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License.  See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitstreamReader class.  This class can be used to
+// read an arbitrary bitstream, regardless of its contents.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BITSTREAM_READER_H
+#define BITSTREAM_READER_H
+
+#include "llvm/Bitcode/BitCodes.h"
+#include "llvm/ADT/SmallVector.h"
+#include <cassert>
+
+namespace llvm {
+  
+class BitstreamReader {
+  const unsigned char *NextChar;
+  const unsigned char *LastChar;
+  
+  /// CurWord - This is the current data we have pulled from the stream but have
+  /// not returned to the client.
+  uint32_t CurWord;
+  
+  /// BitsInCurWord - This is the number of bits in CurWord that are valid. This
+  /// is always from [0...31] inclusive.
+  unsigned BitsInCurWord;
+  
+  // CurCodeSize - This is the declared size of code values used for the current
+  // block, in bits.
+  unsigned CurCodeSize;
+  
+  /// BlockScope - This tracks the codesize of parent blocks.
+  SmallVector<unsigned, 8> BlockScope;
+  
+public:
+  BitstreamReader(const unsigned char *Start, const unsigned char *End)
+    : NextChar(Start), LastChar(End) {
+    assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
+    CurWord = 0;
+    BitsInCurWord = 0;
+    CurCodeSize = 2;
+  }
+  
+  bool AtEndOfStream() const { return NextChar == LastChar; }
+  
+  uint32_t Read(unsigned NumBits) {
+    // If the field is fully contained by CurWord, return it quickly.
+    if (BitsInCurWord >= NumBits) {
+      uint32_t R = CurWord & ((1U << NumBits)-1);
+      CurWord >>= NumBits;
+      BitsInCurWord -= NumBits;
+      return R;
+    }
+
+    // If we run out of data, stop at the end of the stream.
+    if (LastChar == NextChar) {
+      CurWord = 0;
+      BitsInCurWord = 0;
+      return 0;
+    }
+    
+    unsigned R = CurWord;
+
+    // Read the next word from the stream.
+    CurWord = (NextChar[0] <<  0) | (NextChar[1] << 8) |
+              (NextChar[2] << 16) | (NextChar[3] << 24);
+    NextChar += 4;
+    
+    // Extract NumBits-BitsInCurWord from what we just read.
+    unsigned BitsLeft = NumBits-BitsInCurWord;
+    
+    // Be careful here, BitsLeft is in the range [1..32] inclusive.
+    R |= (CurWord & (~0U >> (32-BitsLeft))) << BitsInCurWord;
+    
+    // BitsLeft bits have just been used up from CurWord.
+    if (BitsLeft != 32)
+      CurWord >>= BitsLeft;
+    else
+      CurWord = 0;
+    BitsInCurWord = 32-BitsLeft;
+    return R;
+  }
+  
+  uint32_t ReadVBR(unsigned NumBits) {
+    uint32_t Piece = Read(NumBits);
+    if ((Piece & (1U << NumBits-1)) == 0)
+      return Piece;
+
+    uint32_t Result = 0;
+    unsigned NextBit = 0;
+    while (1) {
+      Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
+
+      if ((Piece & (1U << NumBits-1)) == 0)
+        return Result;
+      
+      NextBit += NumBits-1;
+      Piece = Read(NumBits);
+    }
+  }
+  
+  uint64_t ReadVBR64(unsigned NumBits) {
+    uint64_t Piece = Read(NumBits);
+    if ((Piece & (1U << NumBits-1)) == 0)
+      return Piece;
+    
+    uint64_t Result = 0;
+    unsigned NextBit = 0;
+    while (1) {
+      Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
+      
+      if ((Piece & (1U << NumBits-1)) == 0)
+        return Result;
+      
+      NextBit += NumBits-1;
+      Piece = Read(NumBits);
+    }
+  }
+
+  void SkipToWord() {
+    BitsInCurWord = 0;
+    CurWord = 0;
+  }
+
+  
+  unsigned ReadCode() {
+    return Read(CurCodeSize);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Block Manipulation
+  //===--------------------------------------------------------------------===//
+  
+  // Block header:
+  //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
+
+  /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for
+  /// the block.
+  unsigned ReadSubBlockID() {
+    return ReadVBR(bitc::BlockIDWidth);
+  }
+  
+  /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip
+  /// over the body of this block.  If the block record is malformed, return
+  /// true.
+  bool SkipBlock() {
+    // Read and ignore the codelen value.  Since we are skipping this block, we
+    // don't care what code widths are used inside of it.
+    ReadVBR(bitc::CodeLenWidth);
+    SkipToWord();
+    unsigned NumWords = Read(bitc::BlockSizeWidth);
+    
+    // Check that the block wasn't partially defined, and that the offset isn't
+    // bogus.
+    if (AtEndOfStream() || NextChar+NumWords*4 > LastChar)
+      return true;
+    
+    NextChar += NumWords*4;
+    return false;
+  }
+  
+  /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, read and enter
+  /// the block, returning the BlockID of the block we just entered.
+  bool EnterSubBlock() {
+    BlockScope.push_back(CurCodeSize);
+    
+    // Get the codesize of this block.
+    CurCodeSize = ReadVBR(bitc::CodeLenWidth);
+    SkipToWord();
+    unsigned NumWords = Read(bitc::BlockSizeWidth);
+    
+    // Validate that this block is sane.
+    if (CurCodeSize == 0 || AtEndOfStream() || NextChar+NumWords*4 > LastChar)
+      return true;
+    
+    return false;
+  }
+  
+  bool ReadBlockEnd() {
+    if (BlockScope.empty()) return true;
+    
+    // Block tail:
+    //    [END_BLOCK, <align4bytes>]
+    SkipToWord();
+    CurCodeSize = BlockScope.back();
+    BlockScope.pop_back();
+    return false;
+  }
+  
+  //===--------------------------------------------------------------------===//
+  // Record Processing
+  //===--------------------------------------------------------------------===//
+  
+  unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals) {
+    if (AbbrevID == bitc::UNABBREV_RECORD) {
+      unsigned Code = ReadVBR(6);
+      unsigned NumElts = ReadVBR(6);
+      for (unsigned i = 0; i != NumElts; ++i)
+        Vals.push_back(ReadVBR64(6));
+      return Code;
+    }
+    
+    assert(0 && "Reading with abbrevs not implemented!");
+  }
+  
+};
+
+} // End llvm namespace
+
+#endif
+
+    
\ No newline at end of file
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
new file mode 100644
index 00000000000..4aef53bf7c3
--- /dev/null
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -0,0 +1,219 @@
+//===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License.  See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitstreamWriter class.  This class can be used to
+// write an arbitrary bitstream, regardless of its contents.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BITSTREAM_WRITER_H
+#define BITSTREAM_WRITER_H
+
+#include "llvm/Bitcode/BitCodes.h"
+#include "llvm/ADT/SmallVector.h"
+#include <cassert>
+#include <vector>
+
+namespace llvm {
+
+class BitstreamWriter {
+  std::vector<unsigned char> &Out;
+
+  /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
+  unsigned CurBit;
+  
+  /// CurValue - The current value.  Only bits < CurBit are valid.
+  uint32_t CurValue;
+  
+  // CurCodeSize - This is the declared size of code values used for the current
+  // block, in bits.
+  unsigned CurCodeSize;
+  
+  struct Block {
+    unsigned PrevCodeSize;
+    unsigned StartSizeWord;
+    Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
+  };
+  
+  /// BlockScope - This tracks the current blocks that we have entered.
+  std::vector<Block> BlockScope;
+public:
+  BitstreamWriter(std::vector<unsigned char> &O) 
+    : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
+
+  ~BitstreamWriter() {
+    assert(CurBit == 0 && "Unflused data remaining");
+    assert(BlockScope.empty() && "Block imbalance");
+  }
+  //===--------------------------------------------------------------------===//
+  // Basic Primitives for emitting bits to the stream.
+  //===--------------------------------------------------------------------===//
+  
+  void Emit(uint32_t Val, unsigned NumBits) {
+    assert(NumBits <= 32 && "Invalid value size!");
+    assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
+    CurValue |= Val << CurBit;
+    if (CurBit + NumBits < 32) {
+      CurBit += NumBits;
+      return;
+    }
+    
+    // Add the current word.
+    unsigned V = CurValue;
+    Out.push_back((unsigned char)(V >>  0));
+    Out.push_back((unsigned char)(V >>  8));
+    Out.push_back((unsigned char)(V >> 16));
+    Out.push_back((unsigned char)(V >> 24));
+    
+    if (CurBit)
+      CurValue = Val >> 32-CurBit;
+    else
+      CurValue = 0;
+    CurBit = (CurBit+NumBits) & 31;
+  }
+  
+  void Emit64(uint64_t Val, unsigned NumBits) {
+    if (NumBits <= 32)
+      Emit((uint32_t)Val, NumBits);
+    else {
+      Emit((uint32_t)Val, 32);
+      Emit((uint32_t)(Val >> 32), NumBits-32);
+    }
+  }
+  
+  void FlushToWord() {
+    if (CurBit) {
+      unsigned V = CurValue;
+      Out.push_back((unsigned char)(V >>  0));
+      Out.push_back((unsigned char)(V >>  8));
+      Out.push_back((unsigned char)(V >> 16));
+      Out.push_back((unsigned char)(V >> 24));
+      CurBit = 0;
+      CurValue = 0;
+    }
+  }
+  
+  void EmitVBR(uint32_t Val, unsigned NumBits) {
+    uint32_t Threshold = 1U << (NumBits-1);
+    
+    // Emit the bits with VBR encoding, NumBits-1 bits at a time.
+    while (Val >= Threshold) {
+      Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits);
+      Val >>= NumBits-1;
+    }
+    
+    Emit(Val, NumBits);
+  }
+  
+  void EmitVBR64(uint64_t Val, unsigned NumBits) {
+    if ((uint32_t)Val == Val)
+      return EmitVBR((uint32_t)Val, NumBits);
+    
+    uint64_t Threshold = 1U << (NumBits-1);
+    
+    // Emit the bits with VBR encoding, NumBits-1 bits at a time.
+    while (Val >= Threshold) {
+      Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) |
+           (1 << (NumBits-1)), NumBits);
+      Val >>= NumBits-1;
+    }
+    
+    Emit((uint32_t)Val, NumBits);
+  }
+  
+  /// EmitCode - Emit the specified code.
+  void EmitCode(unsigned Val) {
+    Emit(Val, CurCodeSize);
+  }
+  
+  //===--------------------------------------------------------------------===//
+  // Block Manipulation
+  //===--------------------------------------------------------------------===//
+  
+  void EnterSubblock(unsigned BlockID, unsigned CodeLen) {
+    // Block header:
+    //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
+    EmitCode(bitc::ENTER_SUBBLOCK);
+    EmitVBR(BlockID, bitc::BlockIDWidth);
+    EmitVBR(CodeLen, bitc::CodeLenWidth);
+    FlushToWord();
+    BlockScope.push_back(Block(CurCodeSize, Out.size()/4));
+    // Emit a placeholder, which will be replaced when the block is popped.
+    Emit(0, bitc::BlockSizeWidth);
+    
+    CurCodeSize = CodeLen;
+  }
+  
+  void ExitBlock() {
+    assert(!BlockScope.empty() && "Block scope imbalance!");
+    Block B = BlockScope.back();
+    BlockScope.pop_back();
+    
+    // Block tail:
+    //    [END_BLOCK, <align4bytes>]
+    EmitCode(bitc::END_BLOCK);
+    FlushToWord();
+
+    // Compute the size of the block, in words, not counting the size field.
+    unsigned SizeInWords = Out.size()/4-B.StartSizeWord - 1;
+    unsigned ByteNo = B.StartSizeWord*4;
+    
+    // Update the block size field in the header of this sub-block.
+    Out[ByteNo++] = (unsigned char)(SizeInWords >>  0);
+    Out[ByteNo++] = (unsigned char)(SizeInWords >>  8);
+    Out[ByteNo++] = (unsigned char)(SizeInWords >> 16);
+    Out[ByteNo++] = (unsigned char)(SizeInWords >> 24);
+    
+    // Restore the outer block's code size.
+    CurCodeSize = B.PrevCodeSize;
+  }
+  
+  //===--------------------------------------------------------------------===//
+  // Record Emission
+  //===--------------------------------------------------------------------===//
+  
+  /// EmitRecord - Emit the specified record to the stream, using an abbrev if
+  /// we have one to compress the output.
+  void EmitRecord(unsigned Code, SmallVectorImpl<uint64_t> &Vals,
+                  unsigned Abbrev = 0) {
+    if (Abbrev) {
+      assert(0 && "abbrevs not implemented yet!");
+    } else {
+      // If we don't have an abbrev to use, emit this in its fully unabbreviated
+      // form.
+      EmitCode(bitc::UNABBREV_RECORD);
+      EmitVBR(Code, 6);
+      EmitVBR(Vals.size(), 6);
+      for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+        EmitVBR64(Vals[i], 6);
+    }
+  }
+  
+  /// EmitRecord - Emit the specified record to the stream, using an abbrev if
+  /// we have one to compress the output.
+  void EmitRecord(unsigned Code, SmallVectorImpl<unsigned> &Vals,
+                  unsigned Abbrev = 0) {
+    if (Abbrev) {
+      assert(0 && "abbrevs not implemented yet!");
+    } else {
+      // If we don't have an abbrev to use, emit this in its fully unabbreviated
+      // form.
+      EmitCode(bitc::UNABBREV_RECORD);
+      EmitVBR(Code, 6);
+      EmitVBR(Vals.size(), 6);
+      for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+        EmitVBR(Vals[i], 6);
+    }
+  }
+};
+
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h
new file mode 100644
index 00000000000..0522f1bb28e
--- /dev/null
+++ b/include/llvm/Bitcode/ReaderWriter.h
@@ -0,0 +1,38 @@
+//===-- llvm/Bitcode/ReaderWriter.h - Bitcode reader/writers ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines interfaces to read and write LLVM bitcode files/streams.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_H
+#define LLVM_BITCODE_H
+
+#include <iosfwd>
+#include <string>
+
+namespace llvm {
+  class Module;
+  class ModuleProvider;
+  
+  ModuleProvider *getBitcodeModuleProvider(const std::string &Filename,
+                                           std::string *ErrMsg = 0);
+
+  
+  /// ParseBitcodeFile - Read the specified bitcode file, returning the module.
+  /// If an error occurs, return null and fill in *ErrMsg if non-null.
+  Module *ParseBitcodeFile(const std::string &Filename,
+                           std::string *ErrMsg = 0);
+  
+  /// WriteBitcodeToFile - Write the specified module to the specified output
+  /// stream.
+  void WriteBitcodeToFile(const Module *M, std::ostream &Out);
+} // End llvm namespace
+
+#endif