Allow the MCDisassembler to return a "soft fail" status code, indicating an instruction that is disassemblable, but invalid. Only used for ARM UNPREDICTABLE instructions at the moment.

Patch by James Molloy.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137830 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Owen Anderson 2011-08-17 17:44:15 +00:00
parent 0e6d230abd
commit 83e3f67fb6
12 changed files with 793 additions and 657 deletions

View File

@ -25,6 +25,34 @@ struct EDInstInfo;
/// and provides an array of assembly instructions.
class MCDisassembler {
public:
/// Ternary decode status. Most backends will just use Fail and
/// Success, however some have a concept of an instruction with
/// understandable semantics but which is architecturally
/// incorrect. An example of this is ARM UNPREDICTABLE instructions
/// which are disassemblable but cause undefined behaviour.
///
/// Because it makes sense to disassemble these instructions, there
/// is a "soft fail" failure mode that indicates the MCInst& is
/// valid but architecturally incorrect.
///
/// The enum numbers are deliberately chosen such that reduction
/// from Success->SoftFail ->Fail can be done with a simple
/// bitwise-AND:
///
/// LEFT & TOP = | Success Unpredictable Fail
/// --------------+-----------------------------------
/// Success | Success Unpredictable Fail
/// Unpredictable | Unpredictable Unpredictable Fail
/// Fail | Fail Fail Fail
///
/// An easy way of encoding this is as 0b11, 0b01, 0b00 for
/// Success, SoftFail, Fail respectively.
enum DecodeStatus {
Fail = 0,
SoftFail = 1,
Success = 3
};
/// Constructor - Performs initial setup for the disassembler.
MCDisassembler() : GetOpInfo(0), DisInfo(0), Ctx(0) {}
@ -41,8 +69,11 @@ public:
/// @param address - The address, in the memory space of region, of the first
/// byte of the instruction.
/// @param vStream - The stream to print warnings and diagnostic messages on.
/// @return - True if the instruction is valid; false otherwise.
virtual bool getInstruction(MCInst& instr,
/// @return - MCDisassembler::Success if the instruction is valid,
/// MCDisassembler::SoftFail if the instruction was
/// disassemblable but invalid,
/// MCDisassembler::Fail if the instruction was invalid.
virtual DecodeStatus getInstruction(MCInst& instr,
uint64_t& size,
const MemoryObject &region,
uint64_t address,

File diff suppressed because it is too large Load Diff

View File

@ -40,11 +40,11 @@ public:
}
/// getInstruction - See MCDisassembler.
bool getInstruction(MCInst &instr,
uint64_t &size,
const MemoryObject &region,
uint64_t address,
raw_ostream &vStream) const;
DecodeStatus getInstruction(MCInst &instr,
uint64_t &size,
const MemoryObject &region,
uint64_t address,
raw_ostream &vStream) const;
/// getEDInfo - See MCDisassembler.
EDInstInfo *getEDInfo() const;
@ -64,11 +64,11 @@ public:
}
/// getInstruction - See MCDisassembler.
bool getInstruction(MCInst &instr,
uint64_t &size,
const MemoryObject &region,
uint64_t address,
raw_ostream &vStream) const;
DecodeStatus getInstruction(MCInst &instr,
uint64_t &size,
const MemoryObject &region,
uint64_t address,
raw_ostream &vStream) const;
/// getEDInfo - See MCDisassembler.
EDInstInfo *getEDInfo() const;

View File

@ -493,7 +493,7 @@ EDInstInfo *MBlazeDisassembler::getEDInfo() const {
// Public interface for the disassembler
//
bool MBlazeDisassembler::getInstruction(MCInst &instr,
MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr,
uint64_t &size,
const MemoryObject &region,
uint64_t address,
@ -508,7 +508,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
// We want to read exactly 4 bytes of data.
if (region.readBytes(address, 4, (uint8_t*)bytes, &read) == -1 || read < 4)
return false;
return Fail;
// Encoded as a big-endian 32-bit word in the stream.
insn = (bytes[0]<<24) | (bytes[1]<<16) | (bytes[2]<< 8) | (bytes[3]<<0);
@ -517,7 +517,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
// that it is a valid instruction.
unsigned opcode = getOPCODE(insn);
if (opcode == UNSUPPORTED)
return false;
return Fail;
instr.setOpcode(opcode);
@ -529,11 +529,11 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
uint64_t tsFlags = MBlazeInsts[opcode].TSFlags;
switch ((tsFlags & MBlazeII::FormMask)) {
default:
return false;
return Fail;
case MBlazeII::FRRRR:
if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RB));
instr.addOperand(MCOperand::CreateReg(RA));
@ -541,7 +541,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FRRR:
if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateReg(RB));
@ -550,23 +550,23 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FRI:
switch (opcode) {
default:
return false;
return Fail;
case MBlaze::MFS:
if (RD == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
break;
case MBlaze::MTS:
if (RA == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
instr.addOperand(MCOperand::CreateReg(RA));
break;
case MBlaze::MSRSET:
case MBlaze::MSRCLR:
if (RD == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(insn&0x7FFF));
break;
@ -575,7 +575,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FRRI:
if (RD == UNSUPPORTED || RA == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RA));
switch (opcode) {
@ -592,35 +592,35 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FCRR:
if (RA == UNSUPPORTED || RB == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateReg(RB));
break;
case MBlazeII::FCRI:
if (RA == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
break;
case MBlazeII::FRCR:
if (RD == UNSUPPORTED || RB == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RB));
break;
case MBlazeII::FRCI:
if (RD == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
break;
case MBlazeII::FCCR:
if (RB == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RB));
break;
@ -630,7 +630,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FRRCI:
if (RD == UNSUPPORTED || RA == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateImm(getSHT(insn)));
@ -638,35 +638,35 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FRRC:
if (RD == UNSUPPORTED || RA == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RA));
break;
case MBlazeII::FRCX:
if (RD == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
break;
case MBlazeII::FRCS:
if (RD == UNSUPPORTED || RS == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RS));
break;
case MBlazeII::FCRCS:
if (RS == UNSUPPORTED || RA == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RS));
instr.addOperand(MCOperand::CreateReg(RA));
break;
case MBlazeII::FCRCX:
if (RA == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
break;
@ -677,13 +677,13 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FCR:
if (RB == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RB));
break;
case MBlazeII::FRIR:
if (RD == UNSUPPORTED || RA == UNSUPPORTED)
return false;
return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
instr.addOperand(MCOperand::CreateReg(RA));
@ -693,7 +693,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
// We always consume 4 bytes of data on success
size = 4;
return true;
return Success;
}
static MCDisassembler *createMBlazeDisassembler(const Target &T) {

View File

@ -40,7 +40,7 @@ public:
}
/// getInstruction - See MCDisassembler.
bool getInstruction(MCInst &instr,
MCDisassembler::DecodeStatus getInstruction(MCInst &instr,
uint64_t &size,
const MemoryObject &region,
uint64_t address,

View File

@ -106,11 +106,12 @@ static void logger(void* arg, const char* log) {
// Public interface for the disassembler
//
bool X86GenericDisassembler::getInstruction(MCInst &instr,
uint64_t &size,
const MemoryObject &region,
uint64_t address,
raw_ostream &vStream) const {
MCDisassembler::DecodeStatus
X86GenericDisassembler::getInstruction(MCInst &instr,
uint64_t &size,
const MemoryObject &region,
uint64_t address,
raw_ostream &vStream) const {
InternalInstruction internalInstr;
int ret = decodeInstruction(&internalInstr,
@ -123,11 +124,11 @@ bool X86GenericDisassembler::getInstruction(MCInst &instr,
if (ret) {
size = internalInstr.readerCursor - address;
return false;
return Fail;
}
else {
size = internalInstr.length;
return !translateInstruction(instr, internalInstr);
return (!translateInstruction(instr, internalInstr)) ? Success : Fail;
}
}

View File

@ -112,11 +112,11 @@ public:
~X86GenericDisassembler();
/// getInstruction - See MCDisassembler.
bool getInstruction(MCInst &instr,
uint64_t &size,
const MemoryObject &region,
uint64_t address,
raw_ostream &vStream) const;
DecodeStatus getInstruction(MCInst &instr,
uint64_t &size,
const MemoryObject &region,
uint64_t address,
raw_ostream &vStream) const;
/// getEDInfo - See MCDisassembler.
EDInstInfo *getEDInfo() const;

View File

@ -1,4 +1,4 @@
# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {potentially undefined instruction encoding}
# Opcode=140 Name=LDRB_POST Format=ARM_FORMAT_LDFRM(6)
# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0

View File

@ -65,15 +65,26 @@ static bool PrintInsts(const MCDisassembler &DisAsm,
for (Index = 0; Index < Bytes.size(); Index += Size) {
MCInst Inst;
if (DisAsm.getInstruction(Inst, Size, memoryObject, Index,
/*REMOVE*/ nulls())) {
Printer.printInst(&Inst, Out);
Out << "\n";
} else {
MCDisassembler::DecodeStatus S;
S = DisAsm.getInstruction(Inst, Size, memoryObject, Index,
/*REMOVE*/ nulls());
switch (S) {
case MCDisassembler::Fail:
SM.PrintMessage(SMLoc::getFromPointer(Bytes[Index].second),
"invalid instruction encoding", "warning");
if (Size == 0)
Size = 1; // skip illegible bytes
break;
case MCDisassembler::SoftFail:
SM.PrintMessage(SMLoc::getFromPointer(Bytes[Index].second),
"potentially undefined instruction encoding", "warning");
// Fall through
case MCDisassembler::Success:
Printer.printInst(&Inst, Out);
Out << "\n";
break;
}
}

View File

@ -128,5 +128,15 @@ void DisassemblerEmitter::run(raw_ostream &OS) {
return;
}
// ARM and Thumb have a CHECK() macro to deal with DecodeStatuses.
if (Target.getName() == "ARM" ||
Target.getName() == "Thumb") {
FixedLenDecoderEmitter(Records,
"CHECK(S, ", ");",
"S", "Fail",
"DecodeStatus S = Success;\n(void)S;").run(OS);
return;
}
FixedLenDecoderEmitter(Records).run(OS);
}

View File

@ -238,19 +238,24 @@ protected:
// Width of instructions
unsigned BitWidth;
// Parent emitter
const FixedLenDecoderEmitter *Emitter;
public:
FilterChooser(const FilterChooser &FC) :
AllInstructions(FC.AllInstructions), Opcodes(FC.Opcodes),
Operands(FC.Operands), Filters(FC.Filters),
FilterBitValues(FC.FilterBitValues), Parent(FC.Parent),
BestIndex(FC.BestIndex), BitWidth(FC.BitWidth) { }
BestIndex(FC.BestIndex), BitWidth(FC.BitWidth),
Emitter(FC.Emitter) { }
FilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
const std::vector<unsigned> &IDs,
std::map<unsigned, std::vector<OperandInfo> > &Ops,
unsigned BW) :
unsigned BW,
const FixedLenDecoderEmitter *E) :
AllInstructions(Insts), Opcodes(IDs), Operands(Ops), Filters(),
Parent(NULL), BestIndex(-1), BitWidth(BW) {
Parent(NULL), BestIndex(-1), BitWidth(BW), Emitter(E) {
for (unsigned i = 0; i < BitWidth; ++i)
FilterBitValues.push_back(BIT_UNFILTERED);
@ -264,7 +269,8 @@ public:
FilterChooser &parent) :
AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
Filters(), FilterBitValues(ParentFilterBitValues),
Parent(&parent), BestIndex(-1), BitWidth(parent.BitWidth) {
Parent(&parent), BestIndex(-1), BitWidth(parent.BitWidth),
Emitter(parent.Emitter) {
doFilter();
}
@ -563,17 +569,17 @@ unsigned Filter::usefulness() const {
void FilterChooser::emitTop(raw_ostream &o, unsigned Indentation,
std::string Namespace) {
o.indent(Indentation) <<
"static bool decode" << Namespace << "Instruction" << BitWidth
"static MCDisassembler::DecodeStatus decode" << Namespace << "Instruction" << BitWidth
<< "(MCInst &MI, uint" << BitWidth << "_t insn, uint64_t Address, "
<< "const void *Decoder) {\n";
o.indent(Indentation) << " unsigned tmp = 0;\n (void)tmp;\n";
o.indent(Indentation) << " unsigned tmp = 0;\n (void)tmp;\n" << Emitter->Locals << "\n";
++Indentation; ++Indentation;
// Emits code to decode the instructions.
emit(o, Indentation);
o << '\n';
o.indent(Indentation) << "return false;\n";
o.indent(Indentation) << "return " << Emitter->ReturnFail << ";\n";
--Indentation; --Indentation;
o.indent(Indentation) << "}\n";
@ -744,8 +750,8 @@ void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation,
}
if (Decoder != "")
o.indent(Indentation) << " if (!" << Decoder
<< "(MI, tmp, Address, Decoder)) return false;\n";
o.indent(Indentation) << " " << Emitter->GuardPrefix << Decoder
<< "(MI, tmp, Address, Decoder)" << Emitter->GuardPostfix << "\n";
else
o.indent(Indentation) << " MI.addOperand(MCOperand::CreateImm(tmp));\n";
@ -776,15 +782,15 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
// If a custom instruction decoder was specified, use that.
if (I->numFields() == 0 && I->Decoder.size()) {
o.indent(Indentation) << " if (!" << I->Decoder
<< "(MI, insn, Address, Decoder)) return false;\n";
o.indent(Indentation) << " " << Emitter->GuardPrefix << I->Decoder
<< "(MI, insn, Address, Decoder)" << Emitter->GuardPostfix << "\n";
break;
}
emitBinaryParser(o, Indentation, *I);
}
o.indent(Indentation) << " return true; // " << nameWithID(Opc)
o.indent(Indentation) << " return " << Emitter->ReturnOK << "; // " << nameWithID(Opc)
<< '\n';
o.indent(Indentation) << "}\n";
return true;
@ -821,14 +827,14 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
// If a custom instruction decoder was specified, use that.
if (I->numFields() == 0 && I->Decoder.size()) {
o.indent(Indentation) << " if (!" << I->Decoder
<< "(MI, insn, Address, Decoder)) return false;\n";
o.indent(Indentation) << " " << Emitter->GuardPrefix << I->Decoder
<< "(MI, insn, Address, Decoder)" << Emitter->GuardPostfix << "\n";
break;
}
emitBinaryParser(o, Indentation, *I);
}
o.indent(Indentation) << " return true; // " << nameWithID(Opc)
o.indent(Indentation) << " return " << Emitter->ReturnOK << "; // " << nameWithID(Opc)
<< '\n';
o.indent(Indentation) << "}\n";
@ -1426,7 +1432,7 @@ void FixedLenDecoderEmitter::run(raw_ostream &o)
// Emit the decoder for this namespace+width combination.
FilterChooser FC(NumberedInstructions, I->second, Operands,
8*I->first.second);
8*I->first.second, this);
FC.emitTop(o, 0, I->first.first);
}

View File

@ -49,9 +49,16 @@ struct OperandInfo {
class FixedLenDecoderEmitter : public TableGenBackend {
public:
FixedLenDecoderEmitter(RecordKeeper &R) :
FixedLenDecoderEmitter(RecordKeeper &R,
std::string GPrefix = "if (",
std::string GPostfix = " == MCDisassembler::Fail) return MCDisassembler::Fail;",
std::string ROK = "MCDisassembler::Success",
std::string RFail = "MCDisassembler::Fail",
std::string L = "") :
Records(R), Target(R),
NumberedInstructions(Target.getInstructionsByEnumValue()) {}
NumberedInstructions(Target.getInstructionsByEnumValue()),
GuardPrefix(GPrefix), GuardPostfix(GPostfix),
ReturnOK(ROK), ReturnFail(RFail), Locals(L) {}
// run - Output the code emitter
void run(raw_ostream &o);
@ -62,7 +69,10 @@ private:
std::vector<const CodeGenInstruction*> NumberedInstructions;
std::vector<unsigned> Opcodes;
std::map<unsigned, std::vector<OperandInfo> > Operands;
public:
std::string GuardPrefix, GuardPostfix;
std::string ReturnOK, ReturnFail;
std::string Locals;
};
} // end llvm namespace