From 5fa428fda9eb0f333311eca20b9f08fef975a8c0 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 5 Apr 2004 01:27:26 +0000 Subject: [PATCH] Implement support for a new LLVM 1.3 bytecode format, which uses uint's to index into structure types and allows arbitrary 32- and 64-bit integer types to index into sequential types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@12651 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bytecode/Reader/ConstantReader.cpp | 15 ++ lib/Bytecode/Reader/InstructionReader.cpp | 33 +++- lib/Bytecode/Reader/Reader.cpp | 11 +- lib/Bytecode/Reader/ReaderInternals.h | 7 + lib/Bytecode/Writer/InstructionWriter.cpp | 206 ++++++++++++++-------- lib/Bytecode/Writer/Writer.cpp | 6 +- 6 files changed, 191 insertions(+), 87 deletions(-) diff --git a/lib/Bytecode/Reader/ConstantReader.cpp b/lib/Bytecode/Reader/ConstantReader.cpp index b4a219df6b2..8691b26544b 100644 --- a/lib/Bytecode/Reader/ConstantReader.cpp +++ b/lib/Bytecode/Reader/ConstantReader.cpp @@ -15,6 +15,7 @@ #include "ReaderInternals.h" #include "llvm/Module.h" #include "llvm/Constants.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" #include using namespace llvm; @@ -164,6 +165,20 @@ Constant *BytecodeParser::parseConstantValue(const unsigned char *&Buf, return ConstantExpr::getCast(ArgVec[0], getType(TypeID)); } else if (Opcode == Instruction::GetElementPtr) { // GetElementPtr std::vector IdxList(ArgVec.begin()+1, ArgVec.end()); + + if (hasRestrictedGEPTypes) { + const Type *BaseTy = ArgVec[0]->getType(); + generic_gep_type_iterator::iterator> + GTI = gep_type_begin(BaseTy, IdxList.begin(), IdxList.end()), + E = gep_type_end(BaseTy, IdxList.begin(), IdxList.end()); + for (unsigned i = 0; GTI != E; ++GTI, ++i) + if (isa(*GTI)) { + if (IdxList[i]->getType() != Type::UByteTy) + throw std::string("Invalid index for getelementptr!"); + IdxList[i] = ConstantExpr::getCast(IdxList[i], Type::UIntTy); + } + } + return ConstantExpr::getGetElementPtr(ArgVec[0], IdxList); } else if (Opcode == Instruction::Select) { assert(ArgVec.size() == 3); diff --git a/lib/Bytecode/Reader/InstructionReader.cpp b/lib/Bytecode/Reader/InstructionReader.cpp index 90be8cd6f5a..d66b12cf0d9 100644 --- a/lib/Bytecode/Reader/InstructionReader.cpp +++ b/lib/Bytecode/Reader/InstructionReader.cpp @@ -308,10 +308,35 @@ void BytecodeParser::ParseInstruction(const unsigned char *&Buf, for (unsigned i = 1, e = Args.size(); i != e; ++i) { const CompositeType *TopTy = dyn_cast_or_null(NextTy); if (!TopTy) throw std::string("Invalid getelementptr instruction!"); - // FIXME: when PR82 is resolved. - unsigned IdxTy = isa(TopTy) ? Type::UByteTyID :Type::LongTyID; - - Idx.push_back(getValue(IdxTy, Args[i])); + + unsigned ValIdx = Args[i]; + unsigned IdxTy; + if (!hasRestrictedGEPTypes) { + // Struct indices are always uints, sequential type indices can be any + // of the 32 or 64-bit integer types. The actual choice of type is + // encoded in the low two bits of the slot number. + if (isa(TopTy)) + IdxTy = Type::UIntTyID; + else { + switch (ValIdx & 3) { + case 0: IdxTy = Type::UIntTyID; break; + case 1: IdxTy = Type::IntTyID; break; + case 2: IdxTy = Type::ULongTyID; break; + case 3: IdxTy = Type::LongTyID; break; + } + ValIdx >>= 2; + } + } else { + IdxTy = isa(TopTy) ? Type::UByteTyID : Type::LongTyID; + } + + Idx.push_back(getValue(IdxTy, ValIdx)); + + // Convert ubyte struct indices into uint struct indices. + if (isa(TopTy) && hasRestrictedGEPTypes) + if (ConstantUInt *C = dyn_cast(Idx.back())) + Idx[Idx.size()-1] = ConstantExpr::getCast(C, Type::UIntTy); + NextTy = GetElementPtrInst::getIndexedType(InstTy, Idx, true); } diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp index 54c91811a9b..2f0879ba394 100644 --- a/lib/Bytecode/Reader/Reader.cpp +++ b/lib/Bytecode/Reader/Reader.cpp @@ -647,12 +647,10 @@ void BytecodeParser::ParseVersionInfo(const unsigned char *&Buf, // Default values for the current bytecode version hasInconsistentModuleGlobalInfo = false; hasExplicitPrimitiveZeros = false; + hasRestrictedGEPTypes = false; switch (RevisionNum) { case 0: // LLVM 1.0, 1.1 release version - // Compared to rev #2, we added support for weak linkage, a more dense - // encoding, and better varargs support. - // Base LLVM 1.0 bytecode format. hasInconsistentModuleGlobalInfo = true; hasExplicitPrimitiveZeros = true; @@ -663,6 +661,13 @@ void BytecodeParser::ParseVersionInfo(const unsigned char *&Buf, // Also, it fixed the problem where the size of the ModuleGlobalInfo block // included the size for the alignment at the end, where the rest of the // blocks did not. + + // LLVM 1.2 and before required that GEP indices be ubyte constants for + // structures and longs for sequential types. + hasRestrictedGEPTypes = true; + + // FALL THROUGH + case 2: // LLVM 1.3 release version break; default: diff --git a/lib/Bytecode/Reader/ReaderInternals.h b/lib/Bytecode/Reader/ReaderInternals.h index 86bf800ca6e..9e0ffc2c363 100644 --- a/lib/Bytecode/Reader/ReaderInternals.h +++ b/lib/Bytecode/Reader/ReaderInternals.h @@ -108,6 +108,13 @@ private: // int/sbyte/etc. bool hasExplicitPrimitiveZeros; + // Flags to control features specific the LLVM 1.2 and before (revision #1) + + // LLVM 1.2 and earlier required that getelementptr structure indices were + // ubyte constants and that sequential type indices were longs. + bool hasRestrictedGEPTypes; + + typedef std::vector ValueTable; ValueTable Values; ValueTable ModuleValues; diff --git a/lib/Bytecode/Writer/InstructionWriter.cpp b/lib/Bytecode/Writer/InstructionWriter.cpp index e86b027135b..9e063510dfe 100644 --- a/lib/Bytecode/Writer/InstructionWriter.cpp +++ b/lib/Bytecode/Writer/InstructionWriter.cpp @@ -16,6 +16,7 @@ #include "llvm/Module.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" #include "Support/Statistic.h" #include using namespace llvm; @@ -38,20 +39,48 @@ static void outputInstructionFormat0(const Instruction *I, unsigned Opcode, output_vbr(NumArgs + (isa(I) || isa(I) || isa(I)), Out); - for (unsigned i = 0; i < NumArgs; ++i) { - int Slot = Table.getSlot(I->getOperand(i)); - assert(Slot >= 0 && "No slot number for value!?!?"); - output_vbr((unsigned)Slot, Out); - } + if (!isa(&I)) { + for (unsigned i = 0; i < NumArgs; ++i) { + int Slot = Table.getSlot(I->getOperand(i)); + assert(Slot >= 0 && "No slot number for value!?!?"); + output_vbr((unsigned)Slot, Out); + } - if (isa(I) || isa(I)) { - int Slot = Table.getSlot(I->getType()); - assert(Slot != -1 && "Cast return type unknown?"); - output_vbr((unsigned)Slot, Out); - } else if (const VANextInst *VAI = dyn_cast(I)) { - int Slot = Table.getSlot(VAI->getArgType()); - assert(Slot != -1 && "VarArg argument type unknown?"); - output_vbr((unsigned)Slot, Out); + if (isa(I) || isa(I)) { + int Slot = Table.getSlot(I->getType()); + assert(Slot != -1 && "Cast return type unknown?"); + output_vbr((unsigned)Slot, Out); + } else if (const VANextInst *VAI = dyn_cast(I)) { + int Slot = Table.getSlot(VAI->getArgType()); + assert(Slot != -1 && "VarArg argument type unknown?"); + output_vbr((unsigned)Slot, Out); + } + + } else { + int Slot = Table.getSlot(I->getOperand(0)); + assert(Slot >= 0 && "No slot number for value!?!?"); + output_vbr(unsigned(Slot), Out); + + // We need to encode the type of sequential type indices into their slot # + unsigned Idx = 1; + for (gep_type_iterator TI = gep_type_begin(I), E = gep_type_end(I); + Idx != NumArgs; ++TI, ++Idx) { + Slot = Table.getSlot(I->getOperand(Idx)); + assert(Slot >= 0 && "No slot number for value!?!?"); + + if (isa(*TI)) { + unsigned IdxId; + switch (I->getOperand(Idx)->getType()->getPrimitiveID()) { + default: assert(0 && "Unknown index type!"); + case Type::UIntTyID: IdxId = 0; break; + case Type::IntTyID: IdxId = 1; break; + case Type::ULongTyID: IdxId = 2; break; + case Type::LongTyID: IdxId = 3; break; + } + Slot = (Slot << 2) | IdxId; + } + output_vbr(unsigned(Slot), Out); + } } align32(Out); // We must maintain correct alignment! @@ -119,8 +148,9 @@ static void outputInstrVarArgsCall(const Instruction *I, unsigned Opcode, // operand index is >= 2^12. // static void outputInstructionFormat1(const Instruction *I, unsigned Opcode, - const SlotCalculator &Table, int *Slots, - unsigned Type, std::deque &Out) { + const SlotCalculator &Table, + unsigned *Slots, unsigned Type, + std::deque &Out) { // bits Instruction format: // -------------------------- // 01-00: Opcode type, fixed to 1. @@ -138,8 +168,9 @@ static void outputInstructionFormat1(const Instruction *I, unsigned Opcode, // operand index is >= 2^8. // static void outputInstructionFormat2(const Instruction *I, unsigned Opcode, - const SlotCalculator &Table, int *Slots, - unsigned Type, std::deque &Out) { + const SlotCalculator &Table, + unsigned *Slots, unsigned Type, + std::deque &Out) { // bits Instruction format: // -------------------------- // 01-00: Opcode type, fixed to 2. @@ -160,8 +191,9 @@ static void outputInstructionFormat2(const Instruction *I, unsigned Opcode, // operand index is >= 2^6. // static void outputInstructionFormat3(const Instruction *I, unsigned Opcode, - const SlotCalculator &Table, int *Slots, - unsigned Type, std::deque &Out) { + const SlotCalculator &Table, + unsigned *Slots, unsigned Type, + std::deque &Out) { // bits Instruction format: // -------------------------- // 01-00: Opcode type, fixed to 3. @@ -181,6 +213,7 @@ static void outputInstructionFormat3(const Instruction *I, unsigned Opcode, void BytecodeWriter::outputInstruction(const Instruction &I) { assert(I.getOpcode() < 62 && "Opcode too big???"); unsigned Opcode = I.getOpcode(); + unsigned NumOperands = I.getNumOperands(); // Encode 'volatile load' as 62 and 'volatile store' as 63. if (isa(I) && cast(I).isVolatile()) @@ -188,17 +221,6 @@ void BytecodeWriter::outputInstruction(const Instruction &I) { if (isa(I) && cast(I).isVolatile()) Opcode = 63; - unsigned NumOperands = I.getNumOperands(); - int MaxOpSlot = 0; - int Slots[3]; Slots[0] = (1 << 12)-1; // Marker to signify 0 operands - - for (unsigned i = 0; i != NumOperands; ++i) { - int slot = Table.getSlot(I.getOperand(i)); - assert(slot != -1 && "Broken bytecode!"); - if (slot > MaxOpSlot) MaxOpSlot = slot; - if (i < 3) Slots[i] = slot; - } - // Figure out which type to encode with the instruction. Typically we want // the type of the first parameter, as opposed to the type of the instruction // (for example, with setcc, we always know it returns bool, but the type of @@ -226,71 +248,101 @@ void BytecodeWriter::outputInstruction(const Instruction &I) { assert(Slot != -1 && "Type not available!!?!"); Type = (unsigned)Slot; - // Make sure that we take the type number into consideration. We don't want - // to overflow the field size for the instruction format we select. - // - if (Slot > MaxOpSlot) MaxOpSlot = Slot; - - // Handle the special case for cast... - if (isa(I) || isa(I)) { - // Cast has to encode the destination type as the second argument in the - // packet, or else we won't know what type to cast to! - Slots[1] = Table.getSlot(I.getType()); - assert(Slots[1] != -1 && "Cast return type unknown?"); - if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; - NumOperands++; - } else if (const VANextInst *VANI = dyn_cast(&I)) { - Slots[1] = Table.getSlot(VANI->getArgType()); - assert(Slots[1] != -1 && "va_next return type unknown?"); - if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; - NumOperands++; - } else if (const CallInst *CI = dyn_cast(&I)){// Handle VarArg calls - const PointerType *Ty = cast(CI->getCalledValue()->getType()); + // Varargs calls and invokes are encoded entirely different from any other + // instructions. + if (const CallInst *CI = dyn_cast(&I)){ + const PointerType *Ty =cast(CI->getCalledValue()->getType()); if (cast(Ty->getElementType())->isVarArg()) { outputInstrVarArgsCall(CI, Opcode, Table, Type, Out); return; } - } else if (const InvokeInst *II = dyn_cast(&I)) {// ... & Invokes - const PointerType *Ty = cast(II->getCalledValue()->getType()); + } else if (const InvokeInst *II = dyn_cast(&I)) { + const PointerType *Ty =cast(II->getCalledValue()->getType()); if (cast(Ty->getElementType())->isVarArg()) { outputInstrVarArgsCall(II, Opcode, Table, Type, Out); return; } } - // Decide which instruction encoding to use. This is determined primarily by - // the number of operands, and secondarily by whether or not the max operand - // will fit into the instruction encoding. More operands == fewer bits per - // operand. - // - switch (NumOperands) { - case 0: - case 1: - if (MaxOpSlot < (1 << 12)-1) { // -1 because we use 4095 to indicate 0 ops - outputInstructionFormat1(&I, Opcode, Table, Slots, Type, Out); - return; + if (NumOperands <= 3) { + // Make sure that we take the type number into consideration. We don't want + // to overflow the field size for the instruction format we select. + // + unsigned MaxOpSlot = Type; + unsigned Slots[3]; Slots[0] = (1 << 12)-1; // Marker to signify 0 operands + + for (unsigned i = 0; i != NumOperands; ++i) { + int slot = Table.getSlot(I.getOperand(i)); + assert(slot != -1 && "Broken bytecode!"); + if (unsigned(slot) > MaxOpSlot) MaxOpSlot = unsigned(slot); + Slots[i] = unsigned(slot); } - break; - case 2: - if (MaxOpSlot < (1 << 8)) { - outputInstructionFormat2(&I, Opcode, Table, Slots, Type, Out); - return; + // Handle the special cases for various instructions... + if (isa(I) || isa(I)) { + // Cast has to encode the destination type as the second argument in the + // packet, or else we won't know what type to cast to! + Slots[1] = Table.getSlot(I.getType()); + assert(Slots[1] != ~0U && "Cast return type unknown?"); + if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; + NumOperands++; + } else if (const VANextInst *VANI = dyn_cast(&I)) { + Slots[1] = Table.getSlot(VANI->getArgType()); + assert(Slots[1] != ~0U && "va_next return type unknown?"); + if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; + NumOperands++; + } else if (const GetElementPtrInst *GEP = dyn_cast(&I)) { + // We need to encode the type of sequential type indices into their slot # + unsigned Idx = 1; + for (gep_type_iterator I = gep_type_begin(GEP), E = gep_type_end(GEP); + I != E; ++I, ++Idx) + if (isa(*I)) { + unsigned IdxId; + switch (GEP->getOperand(Idx)->getType()->getPrimitiveID()) { + default: assert(0 && "Unknown index type!"); + case Type::UIntTyID: IdxId = 0; break; + case Type::IntTyID: IdxId = 1; break; + case Type::ULongTyID: IdxId = 2; break; + case Type::LongTyID: IdxId = 3; break; + } + Slots[Idx] = (Slots[Idx] << 2) | IdxId; + if (Slots[Idx] > MaxOpSlot) MaxOpSlot = Slots[Idx]; + } } - break; - case 3: - if (MaxOpSlot < (1 << 6)) { - outputInstructionFormat3(&I, Opcode, Table, Slots, Type, Out); - return; + // Decide which instruction encoding to use. This is determined primarily + // by the number of operands, and secondarily by whether or not the max + // operand will fit into the instruction encoding. More operands == fewer + // bits per operand. + // + switch (NumOperands) { + case 0: + case 1: + if (MaxOpSlot < (1 << 12)-1) { // -1 because we use 4095 to indicate 0 ops + outputInstructionFormat1(&I, Opcode, Table, Slots, Type, Out); + return; + } + break; + + case 2: + if (MaxOpSlot < (1 << 8)) { + outputInstructionFormat2(&I, Opcode, Table, Slots, Type, Out); + return; + } + break; + + case 3: + if (MaxOpSlot < (1 << 6)) { + outputInstructionFormat3(&I, Opcode, Table, Slots, Type, Out); + return; + } + break; + default: + break; } - break; - default: - break; } // If we weren't handled before here, we either have a large number of // operands or a large operand index that we are referring to. outputInstructionFormat0(&I, Opcode, Table, Type, Out); } - diff --git a/lib/Bytecode/Writer/Writer.cpp b/lib/Bytecode/Writer/Writer.cpp index 432a39fb330..82fe40db18c 100644 --- a/lib/Bytecode/Writer/Writer.cpp +++ b/lib/Bytecode/Writer/Writer.cpp @@ -54,9 +54,9 @@ BytecodeWriter::BytecodeWriter(std::deque &o, const Module *M) bool hasNoEndianness = M->getEndianness() == Module::AnyEndianness; bool hasNoPointerSize = M->getPointerSize() == Module::AnyPointerSize; - // Output the version identifier... we are currently on bytecode version #1, - // which corresponds to LLVM v1.2. - unsigned Version = (1 << 4) | isBigEndian | (hasLongPointers << 1) | + // Output the version identifier... we are currently on bytecode version #2, + // which corresponds to LLVM v1.3. + unsigned Version = (2 << 4) | isBigEndian | (hasLongPointers << 1) | (hasNoEndianness << 2) | (hasNoPointerSize << 3); output_vbr(Version, Out); align32(Out);