Add support for compressed bytecode

llvm-svn: 17535
2025-04-02 07:41:38 +00:00 · 2004-11-06 23:17:23 +00:00 · 2004-11-06 23:17:23 +00:00 · a81994464f
commit a81994464f
parent a7081d932b
5 changed files with 138 additions and 27 deletions
--- a/include/llvm/Bytecode/Writer.h
+++ b/include/llvm/Bytecode/Writer.h
@ -28,7 +28,8 @@

 namespace llvm {
  class Module;
-  void WriteBytecodeToFile(const Module *M, std::ostream &Out);
+  void WriteBytecodeToFile(const Module *M, std::ostream &Out, 
+                           bool compress = false);
 } // End llvm namespace

 #endif
--- a/lib/Bytecode/Reader/Reader.cpp
+++ b/lib/Bytecode/Reader/Reader.cpp
@ -24,6 +24,7 @@
 #include "llvm/SymbolTable.h"
 #include "llvm/Bytecode/Format.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/Compressor.h"
 #include "llvm/ADT/StringExtras.h"
 #include <sstream>
 #include <algorithm>
@ -2152,6 +2153,22 @@ void BytecodeReader::ParseModule() {
    error("Function declared, but bytecode stream ended before definition");
 }

+static unsigned GetUncompressionBuffer(char*&buff, unsigned& sz, void* ctxt){
+  BytecodeReader::BufferInfo* bi = 
+    reinterpret_cast<BytecodeReader::BufferInfo*>(ctxt);
+  unsigned new_size = bi->size * 2;
+  if (bi->buff == 0 ) {
+    buff = bi->buff = (char*) malloc(new_size);
+    sz = new_size;
+  } else {
+    bi->buff = (char*) ::realloc(bi->buff, new_size);
+    buff = bi->buff + bi->size;
+    sz = bi->size;
+  }
+  bi->size = new_size;
+  return (bi->buff == 0 ? 1 : 0);
+}
+
 /// This function completely parses a bytecode buffer given by the \p Buf
 /// and \p Length parameters.
 void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length, 
@ -2167,9 +2184,25 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
    if (Handler) Handler->handleStart(TheModule, Length);

    // Read and check signature...
-    unsigned Sig = read_uint();
-    if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) {
-      error("Invalid bytecode signature: " + utostr(Sig));
+    bool compressed = 
+      (Buf[0] == 0xEC && Buf[1] == 0xEC && Buf[2] == 0xF6 && Buf[3] == 0xED);
+
+    if (compressed) {
+      bi.size = Length * 2;;
+      // Bytecode is compressed, have to decompress it first.
+      unsigned uncompressedLength = Compressor::decompress((char*)Buf+4,Length-4,
+        GetUncompressionBuffer, (void*) &bi);
+
+      At = MemStart = BlockStart = Buf = (BufPtr) bi.buff;
+      MemEnd = BlockEnd = Buf + uncompressedLength;
+
+    } else {
+      if (!(Buf[0] == 'l' && Buf[1] == 'l' && Buf[2] == 'v' && Buf[3] == 'm'))
+        error("Invalid bytecode signature: " + 
+            utohexstr(Buf[0]) + utohexstr(Buf[1]) + utohexstr(Buf[2]) +
+            utohexstr(Buf[3]));
+      else
+        At += 4; // skip the bytes
    }

    // Tell the handler we're starting a module
@ -2215,6 +2248,8 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
    freeState();
    delete TheModule;
    TheModule = 0;
+    if (bi.buff != 0 )
+      ::free(bi.buff);
    throw;
  } catch (...) {
    std::string msg("Unknown Exception Occurred");
@ -2222,6 +2257,8 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
    freeState();
    delete TheModule;
    TheModule = 0;
+    if (bi.buff != 0 )
+      ::free(bi.buff);
    throw msg;
  }
 }
--- a/lib/Bytecode/Reader/Reader.h
+++ b/lib/Bytecode/Reader/Reader.h
@ -47,10 +47,14 @@ public:
  BytecodeReader( 
    BytecodeHandler* h = 0
  ) { 
-    Handler = h; 
+    Handler = h;
  }

-  ~BytecodeReader() { freeState(); }
+  ~BytecodeReader() { 
+    freeState(); 
+    if (bi.buff != 0)
+      ::free(bi.buff);
+  }

 /// @}
 /// @name Types
@ -63,6 +67,13 @@ public:
  /// @brief The type used for a vector of potentially abstract types
  typedef std::vector<PATypeHolder> TypeListTy;

+  /// @brief An internal buffer object used for handling decompression
+  struct BufferInfo {
+    char* buff;
+    unsigned size;
+    BufferInfo() { buff = 0; size = 0; }
+  };
+
  /// This type provides a vector of Value* via the User class for
  /// storage of Values that have been constructed when reading the
  /// bytecode. Because of forward referencing, constant replacement
@ -235,6 +246,8 @@ protected:
 /// @name Data
 /// @{
 private:
+  BufferInfo bi;      ///< Buffer info for decompression
+
  BufPtr MemStart;     ///< Start of the memory buffer
  BufPtr MemEnd;       ///< End of the memory buffer
  BufPtr BlockStart;   ///< Start of current block being parsed
--- a/lib/Bytecode/Writer/Writer.cpp
+++ b/lib/Bytecode/Writer/Writer.cpp
@ -25,6 +25,7 @@
 #include "llvm/Module.h"
 #include "llvm/SymbolTable.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/Compressor.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include <cstring>
@ -1085,36 +1086,92 @@ void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) {
  }
 }

-void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out) {
+struct CompressionContext {
+  char* chunk;
+  unsigned sz;
+  unsigned written;
+  std::ostream* Out;
+};
+
+static unsigned WriteCompressedData(char*&buffer, unsigned& size, void* context) {
+  CompressionContext* ctxt = reinterpret_cast<CompressionContext*>(context);
+  if (ctxt->chunk != 0 && ctxt->sz > 0 ) {
+    ctxt->Out->write(ctxt->chunk,ctxt->sz);
+    delete [] ctxt->chunk;
+    ctxt->written += ctxt->sz;
+  }
+  size = ctxt->sz = 1024*1024;
+  buffer = ctxt->chunk = new char [ctxt->sz];
+  return (ctxt->chunk == 0 ? 1 : 0);
+}
+
+void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out,
+                               bool compress ) {
  assert(M && "You can't write a null module!!");

+  // Create a vector of unsigned char for the bytecode output. We
+  // reserve 256KBytes of space in the vector so that we avoid doing
+  // lots of little allocations. 256KBytes is sufficient for a large
+  // proportion of the bytecode files we will encounter. Larger files
+  // will be automatically doubled in size as needed (std::vector
+  // behavior).
  std::vector<unsigned char> Buffer;
-  Buffer.reserve(64 * 1024); // avoid lots of little reallocs
+  Buffer.reserve(256 * 1024);

-  // This object populates buffer for us...
+  // The BytecodeWriter populates Buffer for us.
  BytecodeWriter BCW(Buffer, M);

-  // Keep track of how much we've written...
+  // Keep track of how much we've written
  BytesWritten += Buffer.size();

-  // Okay, write the deque out to the ostream now... the deque is not
-  // sequential in memory, however, so write out as much as possible in big
-  // chunks, until we're done.
-  //
-  for (std::vector<unsigned char>::const_iterator I = Buffer.begin(),
-         E = Buffer.end(); I != E; ) {
-    // Scan to see how big this chunk is...
-    const unsigned char *ChunkPtr = &*I;
-    const unsigned char *LastPtr = ChunkPtr;
-    while (I != E) {
-      const unsigned char *ThisPtr = &*++I;
-      if (++LastPtr != ThisPtr) // Advanced by more than a byte of memory?
-        break;
+  // Determine start and end points of the Buffer
+  std::vector<unsigned char>::iterator I = Buffer.begin();
+  const unsigned char *FirstByte = &(*I);
+  const unsigned char *LastByte = FirstByte + Buffer.size();
+
+  // If we're supposed to compress this mess ...
+  if (compress) {
+
+    // We signal compression by using an alternate magic number for the
+    // file. The compressed bytecode file's magic number is the same as
+    // the uncompressed one but with the high bits set. So, "llvm", which
+    // is 0x6C 0x6C 0x76 0x6D becomes 0xEC 0xEC 0xF6 0xED
+    unsigned char compressed_magic[4];
+    compressed_magic[0] = 0xEC; // 'l' + 0x80
+    compressed_magic[1] = 0xEC; // 'l' + 0x80
+    compressed_magic[2] = 0xF6; // 'v' + 0x80
+    compressed_magic[3] = 0xED; // 'm' + 0x80
+
+    Out.write((char*)compressed_magic,4);
+
+    // Do the compression, writing as we go.
+    CompressionContext ctxt;
+    ctxt.chunk = 0;
+    ctxt.sz = 0;
+    ctxt.written = 0;
+    ctxt.Out = &Out;
+
+    // Compress everything after the magic number (which we'll alter)
+    uint64_t zipSize = Compressor::compress(
+      (char*)(FirstByte+4),        // Skip the magic number
+      Buffer.size()-4,             // Skip the magic number
+      WriteCompressedData,         // use this function to allocate / write
+      Compressor::COMP_TYPE_BZIP2, // Try bzip2 compression first
+      (void*)&ctxt                 // Keep track of allocated memory
+    );
+
+    if (ctxt.chunk && ctxt.sz > 0) {
+      Out.write(ctxt.chunk, zipSize - ctxt.written);
+      delete [] ctxt.chunk;
    }
-    
-    // Write out the chunk...
-    Out.write((char*)ChunkPtr, unsigned(LastPtr-ChunkPtr));
+  } else {
+
+    // We're not compressing, so just write the entire block.
+    Out.write((char*)FirstByte, LastByte-FirstByte);
+
  }
+
+  // make sure it hits disk now
  Out.flush();
 }

--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@ -40,6 +40,9 @@ Force("f", cl::desc("Overwrite output files"));
 static cl::opt<bool>
 DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden);

+static cl::opt<bool> Compress("compress", cl::Optional,
+       cl::desc("Compress the generated bytecode"));
+
 static cl::opt<bool>
 DisableVerify("disable-verify", cl::Hidden,
              cl::desc("Do not run verifier on input LLVM (dangerous!)"));
@ -119,7 +122,7 @@ int main(int argc, char **argv) {
      return 1;
    }
   
-    WriteBytecodeToFile(M.get(), *Out);
+    WriteBytecodeToFile(M.get(), *Out, Compress);
  } catch (const ParseException &E) {
    std::cerr << argv[0] << ": " << E.getMessage() << "\n";
    return 1;