Add support for compressed bytecode

llvm-svn: 17535
This commit is contained in:
Reid Spencer 2004-11-06 23:17:23 +00:00
parent a7081d932b
commit a81994464f
5 changed files with 138 additions and 27 deletions

View File

@ -28,7 +28,8 @@
namespace llvm { namespace llvm {
class Module; class Module;
void WriteBytecodeToFile(const Module *M, std::ostream &Out); void WriteBytecodeToFile(const Module *M, std::ostream &Out,
bool compress = false);
} // End llvm namespace } // End llvm namespace
#endif #endif

View File

@ -24,6 +24,7 @@
#include "llvm/SymbolTable.h" #include "llvm/SymbolTable.h"
#include "llvm/Bytecode/Format.h" #include "llvm/Bytecode/Format.h"
#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/Compressor.h"
#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringExtras.h"
#include <sstream> #include <sstream>
#include <algorithm> #include <algorithm>
@ -2152,6 +2153,22 @@ void BytecodeReader::ParseModule() {
error("Function declared, but bytecode stream ended before definition"); error("Function declared, but bytecode stream ended before definition");
} }
static unsigned GetUncompressionBuffer(char*&buff, unsigned& sz, void* ctxt){
BytecodeReader::BufferInfo* bi =
reinterpret_cast<BytecodeReader::BufferInfo*>(ctxt);
unsigned new_size = bi->size * 2;
if (bi->buff == 0 ) {
buff = bi->buff = (char*) malloc(new_size);
sz = new_size;
} else {
bi->buff = (char*) ::realloc(bi->buff, new_size);
buff = bi->buff + bi->size;
sz = bi->size;
}
bi->size = new_size;
return (bi->buff == 0 ? 1 : 0);
}
/// This function completely parses a bytecode buffer given by the \p Buf /// This function completely parses a bytecode buffer given by the \p Buf
/// and \p Length parameters. /// and \p Length parameters.
void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length, void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
@ -2167,9 +2184,25 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
if (Handler) Handler->handleStart(TheModule, Length); if (Handler) Handler->handleStart(TheModule, Length);
// Read and check signature... // Read and check signature...
unsigned Sig = read_uint(); bool compressed =
if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) { (Buf[0] == 0xEC && Buf[1] == 0xEC && Buf[2] == 0xF6 && Buf[3] == 0xED);
error("Invalid bytecode signature: " + utostr(Sig));
if (compressed) {
bi.size = Length * 2;;
// Bytecode is compressed, have to decompress it first.
unsigned uncompressedLength = Compressor::decompress((char*)Buf+4,Length-4,
GetUncompressionBuffer, (void*) &bi);
At = MemStart = BlockStart = Buf = (BufPtr) bi.buff;
MemEnd = BlockEnd = Buf + uncompressedLength;
} else {
if (!(Buf[0] == 'l' && Buf[1] == 'l' && Buf[2] == 'v' && Buf[3] == 'm'))
error("Invalid bytecode signature: " +
utohexstr(Buf[0]) + utohexstr(Buf[1]) + utohexstr(Buf[2]) +
utohexstr(Buf[3]));
else
At += 4; // skip the bytes
} }
// Tell the handler we're starting a module // Tell the handler we're starting a module
@ -2215,6 +2248,8 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
freeState(); freeState();
delete TheModule; delete TheModule;
TheModule = 0; TheModule = 0;
if (bi.buff != 0 )
::free(bi.buff);
throw; throw;
} catch (...) { } catch (...) {
std::string msg("Unknown Exception Occurred"); std::string msg("Unknown Exception Occurred");
@ -2222,6 +2257,8 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
freeState(); freeState();
delete TheModule; delete TheModule;
TheModule = 0; TheModule = 0;
if (bi.buff != 0 )
::free(bi.buff);
throw msg; throw msg;
} }
} }

View File

@ -47,10 +47,14 @@ public:
BytecodeReader( BytecodeReader(
BytecodeHandler* h = 0 BytecodeHandler* h = 0
) { ) {
Handler = h; Handler = h;
} }
~BytecodeReader() { freeState(); } ~BytecodeReader() {
freeState();
if (bi.buff != 0)
::free(bi.buff);
}
/// @} /// @}
/// @name Types /// @name Types
@ -63,6 +67,13 @@ public:
/// @brief The type used for a vector of potentially abstract types /// @brief The type used for a vector of potentially abstract types
typedef std::vector<PATypeHolder> TypeListTy; typedef std::vector<PATypeHolder> TypeListTy;
/// @brief An internal buffer object used for handling decompression
struct BufferInfo {
char* buff;
unsigned size;
BufferInfo() { buff = 0; size = 0; }
};
/// This type provides a vector of Value* via the User class for /// This type provides a vector of Value* via the User class for
/// storage of Values that have been constructed when reading the /// storage of Values that have been constructed when reading the
/// bytecode. Because of forward referencing, constant replacement /// bytecode. Because of forward referencing, constant replacement
@ -235,6 +246,8 @@ protected:
/// @name Data /// @name Data
/// @{ /// @{
private: private:
BufferInfo bi; ///< Buffer info for decompression
BufPtr MemStart; ///< Start of the memory buffer BufPtr MemStart; ///< Start of the memory buffer
BufPtr MemEnd; ///< End of the memory buffer BufPtr MemEnd; ///< End of the memory buffer
BufPtr BlockStart; ///< Start of current block being parsed BufPtr BlockStart; ///< Start of current block being parsed

View File

@ -25,6 +25,7 @@
#include "llvm/Module.h" #include "llvm/Module.h"
#include "llvm/SymbolTable.h" #include "llvm/SymbolTable.h"
#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/Compressor.h"
#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Statistic.h"
#include <cstring> #include <cstring>
@ -1085,36 +1086,92 @@ void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) {
} }
} }
void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out) { struct CompressionContext {
char* chunk;
unsigned sz;
unsigned written;
std::ostream* Out;
};
static unsigned WriteCompressedData(char*&buffer, unsigned& size, void* context) {
CompressionContext* ctxt = reinterpret_cast<CompressionContext*>(context);
if (ctxt->chunk != 0 && ctxt->sz > 0 ) {
ctxt->Out->write(ctxt->chunk,ctxt->sz);
delete [] ctxt->chunk;
ctxt->written += ctxt->sz;
}
size = ctxt->sz = 1024*1024;
buffer = ctxt->chunk = new char [ctxt->sz];
return (ctxt->chunk == 0 ? 1 : 0);
}
void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out,
bool compress ) {
assert(M && "You can't write a null module!!"); assert(M && "You can't write a null module!!");
// Create a vector of unsigned char for the bytecode output. We
// reserve 256KBytes of space in the vector so that we avoid doing
// lots of little allocations. 256KBytes is sufficient for a large
// proportion of the bytecode files we will encounter. Larger files
// will be automatically doubled in size as needed (std::vector
// behavior).
std::vector<unsigned char> Buffer; std::vector<unsigned char> Buffer;
Buffer.reserve(64 * 1024); // avoid lots of little reallocs Buffer.reserve(256 * 1024);
// This object populates buffer for us... // The BytecodeWriter populates Buffer for us.
BytecodeWriter BCW(Buffer, M); BytecodeWriter BCW(Buffer, M);
// Keep track of how much we've written... // Keep track of how much we've written
BytesWritten += Buffer.size(); BytesWritten += Buffer.size();
// Okay, write the deque out to the ostream now... the deque is not // Determine start and end points of the Buffer
// sequential in memory, however, so write out as much as possible in big std::vector<unsigned char>::iterator I = Buffer.begin();
// chunks, until we're done. const unsigned char *FirstByte = &(*I);
// const unsigned char *LastByte = FirstByte + Buffer.size();
for (std::vector<unsigned char>::const_iterator I = Buffer.begin(),
E = Buffer.end(); I != E; ) { // If we're supposed to compress this mess ...
// Scan to see how big this chunk is... if (compress) {
const unsigned char *ChunkPtr = &*I;
const unsigned char *LastPtr = ChunkPtr; // We signal compression by using an alternate magic number for the
while (I != E) { // file. The compressed bytecode file's magic number is the same as
const unsigned char *ThisPtr = &*++I; // the uncompressed one but with the high bits set. So, "llvm", which
if (++LastPtr != ThisPtr) // Advanced by more than a byte of memory? // is 0x6C 0x6C 0x76 0x6D becomes 0xEC 0xEC 0xF6 0xED
break; unsigned char compressed_magic[4];
compressed_magic[0] = 0xEC; // 'l' + 0x80
compressed_magic[1] = 0xEC; // 'l' + 0x80
compressed_magic[2] = 0xF6; // 'v' + 0x80
compressed_magic[3] = 0xED; // 'm' + 0x80
Out.write((char*)compressed_magic,4);
// Do the compression, writing as we go.
CompressionContext ctxt;
ctxt.chunk = 0;
ctxt.sz = 0;
ctxt.written = 0;
ctxt.Out = &Out;
// Compress everything after the magic number (which we'll alter)
uint64_t zipSize = Compressor::compress(
(char*)(FirstByte+4), // Skip the magic number
Buffer.size()-4, // Skip the magic number
WriteCompressedData, // use this function to allocate / write
Compressor::COMP_TYPE_BZIP2, // Try bzip2 compression first
(void*)&ctxt // Keep track of allocated memory
);
if (ctxt.chunk && ctxt.sz > 0) {
Out.write(ctxt.chunk, zipSize - ctxt.written);
delete [] ctxt.chunk;
} }
} else {
// Write out the chunk...
Out.write((char*)ChunkPtr, unsigned(LastPtr-ChunkPtr)); // We're not compressing, so just write the entire block.
Out.write((char*)FirstByte, LastByte-FirstByte);
} }
// make sure it hits disk now
Out.flush(); Out.flush();
} }

View File

@ -40,6 +40,9 @@ Force("f", cl::desc("Overwrite output files"));
static cl::opt<bool> static cl::opt<bool>
DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden); DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden);
static cl::opt<bool> Compress("compress", cl::Optional,
cl::desc("Compress the generated bytecode"));
static cl::opt<bool> static cl::opt<bool>
DisableVerify("disable-verify", cl::Hidden, DisableVerify("disable-verify", cl::Hidden,
cl::desc("Do not run verifier on input LLVM (dangerous!)")); cl::desc("Do not run verifier on input LLVM (dangerous!)"));
@ -119,7 +122,7 @@ int main(int argc, char **argv) {
return 1; return 1;
} }
WriteBytecodeToFile(M.get(), *Out); WriteBytecodeToFile(M.get(), *Out, Compress);
} catch (const ParseException &E) { } catch (const ParseException &E) {
std::cerr << argv[0] << ": " << E.getMessage() << "\n"; std::cerr << argv[0] << ": " << E.getMessage() << "\n";
return 1; return 1;