Add support for compressed bytecode

llvm-svn: 17535
This commit is contained in:
Reid Spencer 2004-11-06 23:17:23 +00:00
parent a7081d932b
commit a81994464f
5 changed files with 138 additions and 27 deletions

View File

@ -28,7 +28,8 @@
namespace llvm {
class Module;
void WriteBytecodeToFile(const Module *M, std::ostream &Out);
void WriteBytecodeToFile(const Module *M, std::ostream &Out,
bool compress = false);
} // End llvm namespace
#endif

View File

@ -24,6 +24,7 @@
#include "llvm/SymbolTable.h"
#include "llvm/Bytecode/Format.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/Compressor.h"
#include "llvm/ADT/StringExtras.h"
#include <sstream>
#include <algorithm>
@ -2152,6 +2153,22 @@ void BytecodeReader::ParseModule() {
error("Function declared, but bytecode stream ended before definition");
}
static unsigned GetUncompressionBuffer(char*&buff, unsigned& sz, void* ctxt){
BytecodeReader::BufferInfo* bi =
reinterpret_cast<BytecodeReader::BufferInfo*>(ctxt);
unsigned new_size = bi->size * 2;
if (bi->buff == 0 ) {
buff = bi->buff = (char*) malloc(new_size);
sz = new_size;
} else {
bi->buff = (char*) ::realloc(bi->buff, new_size);
buff = bi->buff + bi->size;
sz = bi->size;
}
bi->size = new_size;
return (bi->buff == 0 ? 1 : 0);
}
/// This function completely parses a bytecode buffer given by the \p Buf
/// and \p Length parameters.
void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
@ -2167,9 +2184,25 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
if (Handler) Handler->handleStart(TheModule, Length);
// Read and check signature...
unsigned Sig = read_uint();
if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) {
error("Invalid bytecode signature: " + utostr(Sig));
bool compressed =
(Buf[0] == 0xEC && Buf[1] == 0xEC && Buf[2] == 0xF6 && Buf[3] == 0xED);
if (compressed) {
bi.size = Length * 2;;
// Bytecode is compressed, have to decompress it first.
unsigned uncompressedLength = Compressor::decompress((char*)Buf+4,Length-4,
GetUncompressionBuffer, (void*) &bi);
At = MemStart = BlockStart = Buf = (BufPtr) bi.buff;
MemEnd = BlockEnd = Buf + uncompressedLength;
} else {
if (!(Buf[0] == 'l' && Buf[1] == 'l' && Buf[2] == 'v' && Buf[3] == 'm'))
error("Invalid bytecode signature: " +
utohexstr(Buf[0]) + utohexstr(Buf[1]) + utohexstr(Buf[2]) +
utohexstr(Buf[3]));
else
At += 4; // skip the bytes
}
// Tell the handler we're starting a module
@ -2215,6 +2248,8 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
freeState();
delete TheModule;
TheModule = 0;
if (bi.buff != 0 )
::free(bi.buff);
throw;
} catch (...) {
std::string msg("Unknown Exception Occurred");
@ -2222,6 +2257,8 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
freeState();
delete TheModule;
TheModule = 0;
if (bi.buff != 0 )
::free(bi.buff);
throw msg;
}
}

View File

@ -47,10 +47,14 @@ public:
BytecodeReader(
BytecodeHandler* h = 0
) {
Handler = h;
Handler = h;
}
~BytecodeReader() { freeState(); }
~BytecodeReader() {
freeState();
if (bi.buff != 0)
::free(bi.buff);
}
/// @}
/// @name Types
@ -63,6 +67,13 @@ public:
/// @brief The type used for a vector of potentially abstract types
typedef std::vector<PATypeHolder> TypeListTy;
/// @brief An internal buffer object used for handling decompression
struct BufferInfo {
char* buff;
unsigned size;
BufferInfo() { buff = 0; size = 0; }
};
/// This type provides a vector of Value* via the User class for
/// storage of Values that have been constructed when reading the
/// bytecode. Because of forward referencing, constant replacement
@ -235,6 +246,8 @@ protected:
/// @name Data
/// @{
private:
BufferInfo bi; ///< Buffer info for decompression
BufPtr MemStart; ///< Start of the memory buffer
BufPtr MemEnd; ///< End of the memory buffer
BufPtr BlockStart; ///< Start of current block being parsed

View File

@ -25,6 +25,7 @@
#include "llvm/Module.h"
#include "llvm/SymbolTable.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/Compressor.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include <cstring>
@ -1085,36 +1086,92 @@ void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) {
}
}
void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out) {
struct CompressionContext {
char* chunk;
unsigned sz;
unsigned written;
std::ostream* Out;
};
static unsigned WriteCompressedData(char*&buffer, unsigned& size, void* context) {
CompressionContext* ctxt = reinterpret_cast<CompressionContext*>(context);
if (ctxt->chunk != 0 && ctxt->sz > 0 ) {
ctxt->Out->write(ctxt->chunk,ctxt->sz);
delete [] ctxt->chunk;
ctxt->written += ctxt->sz;
}
size = ctxt->sz = 1024*1024;
buffer = ctxt->chunk = new char [ctxt->sz];
return (ctxt->chunk == 0 ? 1 : 0);
}
void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out,
bool compress ) {
assert(M && "You can't write a null module!!");
// Create a vector of unsigned char for the bytecode output. We
// reserve 256KBytes of space in the vector so that we avoid doing
// lots of little allocations. 256KBytes is sufficient for a large
// proportion of the bytecode files we will encounter. Larger files
// will be automatically doubled in size as needed (std::vector
// behavior).
std::vector<unsigned char> Buffer;
Buffer.reserve(64 * 1024); // avoid lots of little reallocs
Buffer.reserve(256 * 1024);
// This object populates buffer for us...
// The BytecodeWriter populates Buffer for us.
BytecodeWriter BCW(Buffer, M);
// Keep track of how much we've written...
// Keep track of how much we've written
BytesWritten += Buffer.size();
// Okay, write the deque out to the ostream now... the deque is not
// sequential in memory, however, so write out as much as possible in big
// chunks, until we're done.
//
for (std::vector<unsigned char>::const_iterator I = Buffer.begin(),
E = Buffer.end(); I != E; ) {
// Scan to see how big this chunk is...
const unsigned char *ChunkPtr = &*I;
const unsigned char *LastPtr = ChunkPtr;
while (I != E) {
const unsigned char *ThisPtr = &*++I;
if (++LastPtr != ThisPtr) // Advanced by more than a byte of memory?
break;
// Determine start and end points of the Buffer
std::vector<unsigned char>::iterator I = Buffer.begin();
const unsigned char *FirstByte = &(*I);
const unsigned char *LastByte = FirstByte + Buffer.size();
// If we're supposed to compress this mess ...
if (compress) {
// We signal compression by using an alternate magic number for the
// file. The compressed bytecode file's magic number is the same as
// the uncompressed one but with the high bits set. So, "llvm", which
// is 0x6C 0x6C 0x76 0x6D becomes 0xEC 0xEC 0xF6 0xED
unsigned char compressed_magic[4];
compressed_magic[0] = 0xEC; // 'l' + 0x80
compressed_magic[1] = 0xEC; // 'l' + 0x80
compressed_magic[2] = 0xF6; // 'v' + 0x80
compressed_magic[3] = 0xED; // 'm' + 0x80
Out.write((char*)compressed_magic,4);
// Do the compression, writing as we go.
CompressionContext ctxt;
ctxt.chunk = 0;
ctxt.sz = 0;
ctxt.written = 0;
ctxt.Out = &Out;
// Compress everything after the magic number (which we'll alter)
uint64_t zipSize = Compressor::compress(
(char*)(FirstByte+4), // Skip the magic number
Buffer.size()-4, // Skip the magic number
WriteCompressedData, // use this function to allocate / write
Compressor::COMP_TYPE_BZIP2, // Try bzip2 compression first
(void*)&ctxt // Keep track of allocated memory
);
if (ctxt.chunk && ctxt.sz > 0) {
Out.write(ctxt.chunk, zipSize - ctxt.written);
delete [] ctxt.chunk;
}
// Write out the chunk...
Out.write((char*)ChunkPtr, unsigned(LastPtr-ChunkPtr));
} else {
// We're not compressing, so just write the entire block.
Out.write((char*)FirstByte, LastByte-FirstByte);
}
// make sure it hits disk now
Out.flush();
}

View File

@ -40,6 +40,9 @@ Force("f", cl::desc("Overwrite output files"));
static cl::opt<bool>
DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden);
static cl::opt<bool> Compress("compress", cl::Optional,
cl::desc("Compress the generated bytecode"));
static cl::opt<bool>
DisableVerify("disable-verify", cl::Hidden,
cl::desc("Do not run verifier on input LLVM (dangerous!)"));
@ -119,7 +122,7 @@ int main(int argc, char **argv) {
return 1;
}
WriteBytecodeToFile(M.get(), *Out);
WriteBytecodeToFile(M.get(), *Out, Compress);
} catch (const ParseException &E) {
std::cerr << argv[0] << ": " << E.getMessage() << "\n";
return 1;