From a21e2eae3def2fe39caed861dcb73c76c715569b Mon Sep 17 00:00:00 2001 From: Sean Callanan Date: Tue, 15 Mar 2011 01:23:15 +0000 Subject: [PATCH] X86 table-generator and disassembler support for the AVX instruction set. This code adds support for the VEX prefix and for the YMM registers accessible on AVX-enabled architectures. Instruction table support that enables AVX instructions for the disassembler is in an upcoming patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@127644 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/Disassembler/X86Disassembler.cpp | 5 + .../X86/Disassembler/X86DisassemblerDecoder.c | 251 ++++++++++++++++-- .../X86/Disassembler/X86DisassemblerDecoder.h | 86 +++++- .../X86DisassemblerDecoderCommon.h | 21 +- utils/TableGen/X86DisassemblerTables.cpp | 61 ++++- utils/TableGen/X86RecognizableInstr.cpp | 225 ++++++++++++---- utils/TableGen/X86RecognizableInstr.h | 16 +- 7 files changed, 570 insertions(+), 95 deletions(-) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index f7777561b6a..d8a105e7e9d 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -409,6 +409,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, case TYPE_XMM32: case TYPE_XMM64: case TYPE_XMM128: + case TYPE_XMM256: case TYPE_DEBUGREG: case TYPE_CONTROLREG: return translateRMRegister(mcInst, insn); @@ -418,6 +419,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, case TYPE_M32: case TYPE_M64: case TYPE_M128: + case TYPE_M256: case TYPE_M512: case TYPE_Mv: case TYPE_M32FP: @@ -500,6 +502,9 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, case ENCODING_Rv: translateRegister(mcInst, insn.opcodeRegister); return false; + case ENCODING_VVVV: + translateRegister(mcInst, insn.vvvv); + return false; case ENCODING_DUP: return translateOperand(mcInst, insn.spec->operands[operand.type - TYPE_DUP0], diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index a9d28c965ac..06300a68686 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -368,29 +368,109 @@ static int readPrefixes(struct InternalInstruction* insn) { if (isPrefix) dbgprintf(insn, "Found prefix 0x%hhx", byte); } + + insn->vexSize = 0; - if (insn->mode == MODE_64BIT) { - if ((byte & 0xf0) == 0x40) { - uint8_t opcodeByte; + if (byte == 0xc4) { + uint8_t byte1; - if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { - dbgprintf(insn, "Redundant REX prefix"); - return -1; - } - - insn->rexPrefix = byte; - insn->necessaryPrefixLocation = insn->readerCursor - 2; - - dbgprintf(insn, "Found REX prefix 0x%hhx", byte); - } else { + if (lookAtByte(insn, &byte1)) { + dbgprintf(insn, "Couldn't read second byte of VEX"); + return -1; + } + + if (insn->mode == MODE_64BIT || byte1 & 0x8) { + insn->vexSize = 3; + insn->necessaryPrefixLocation = insn->readerCursor - 1; + } + else { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; } - } else { - unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; + + if (insn->vexSize == 3) { + insn->vexPrefix[0] = byte; + consumeByte(insn, &insn->vexPrefix[1]); + consumeByte(insn, &insn->vexPrefix[2]); + + /* We simulate the REX prefix for simplicity's sake */ + + insn->rexPrefix = 0x40 + | (wFromVEX3of3(insn->vexPrefix[2]) << 3) + | (rFromVEX2of3(insn->vexPrefix[1]) << 2) + | (xFromVEX2of3(insn->vexPrefix[1]) << 1) + | (bFromVEX2of3(insn->vexPrefix[1]) << 0); + + switch (ppFromVEX3of3(insn->vexPrefix[2])) + { + default: + break; + case VEX_PREFIX_66: + hasOpSize = TRUE; + break; + } + + dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); + } } - + else if (byte == 0xc5) { + uint8_t byte1; + + if (lookAtByte(insn, &byte1)) { + dbgprintf(insn, "Couldn't read second byte of VEX"); + return -1; + } + + if (insn->mode == MODE_64BIT || byte1 & 0x8) { + insn->vexSize = 2; + } + else { + unconsumeByte(insn); + } + + if (insn->vexSize == 2) { + insn->vexPrefix[0] = byte; + consumeByte(insn, &insn->vexPrefix[1]); + + insn->rexPrefix = 0x40 + | (rFromVEX2of2(insn->vexPrefix[1]) << 2); + + switch (ppFromVEX2of2(insn->vexPrefix[1])) + { + default: + break; + case VEX_PREFIX_66: + hasOpSize = TRUE; + break; + } + + dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); + } + } + else { + if (insn->mode == MODE_64BIT) { + if ((byte & 0xf0) == 0x40) { + uint8_t opcodeByte; + + if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { + dbgprintf(insn, "Redundant REX prefix"); + return -1; + } + + insn->rexPrefix = byte; + insn->necessaryPrefixLocation = insn->readerCursor - 2; + + dbgprintf(insn, "Found REX prefix 0x%hhx", byte); + } else { + unconsumeByte(insn); + insn->necessaryPrefixLocation = insn->readerCursor - 1; + } + } else { + unconsumeByte(insn); + insn->necessaryPrefixLocation = insn->readerCursor - 1; + } + } + if (insn->mode == MODE_16BIT) { insn->registerSize = (hasOpSize ? 4 : 2); insn->addressSize = (hasAdSize ? 4 : 2); @@ -438,6 +518,39 @@ static int readOpcode(struct InternalInstruction* insn) { dbgprintf(insn, "readOpcode()"); insn->opcodeType = ONEBYTE; + + if (insn->vexSize == 3) + { + switch (mmmmmFromVEX2of3(insn->vexPrefix[1])) + { + default: + dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1])); + return -1; + case 0: + break; + case VEX_LOB_0F: + insn->twoByteEscape = 0x0f; + insn->opcodeType = TWOBYTE; + return consumeByte(insn, &insn->opcode); + case VEX_LOB_0F38: + insn->twoByteEscape = 0x0f; + insn->threeByteEscape = 0x38; + insn->opcodeType = THREEBYTE_38; + return consumeByte(insn, &insn->opcode); + case VEX_LOB_0F3A: + insn->twoByteEscape = 0x0f; + insn->threeByteEscape = 0x3a; + insn->opcodeType = THREEBYTE_3A; + return consumeByte(insn, &insn->opcode); + } + } + else if (insn->vexSize == 2) + { + insn->twoByteEscape = 0x0f; + insn->opcodeType = TWOBYTE; + return consumeByte(insn, &insn->opcode); + } + if (consumeByte(insn, ¤t)) return -1; @@ -600,20 +713,64 @@ static int getID(struct InternalInstruction* insn) { dbgprintf(insn, "getID()"); attrMask = ATTR_NONE; - + if (insn->mode == MODE_64BIT) attrMask |= ATTR_64BIT; + + if (insn->vexSize) { + attrMask |= ATTR_VEX; + + if (insn->vexSize == 3) { + switch (ppFromVEX3of3(insn->vexPrefix[2])) { + case VEX_PREFIX_66: + attrMask |= ATTR_OPSIZE; + break; + case VEX_PREFIX_F3: + attrMask |= ATTR_XS; + break; + case VEX_PREFIX_F2: + attrMask |= ATTR_XD; + break; + } + + if (wFromVEX3of3(insn->vexPrefix[2])) + attrMask |= ATTR_REXW; + if (lFromVEX3of3(insn->vexPrefix[2])) + attrMask |= ATTR_VEXL; + } + else if (insn->vexSize == 2) { + switch (ppFromVEX2of2(insn->vexPrefix[1])) { + case VEX_PREFIX_66: + attrMask |= ATTR_OPSIZE; + break; + case VEX_PREFIX_F3: + attrMask |= ATTR_XS; + break; + case VEX_PREFIX_F2: + attrMask |= ATTR_XD; + break; + } + + if (lFromVEX2of2(insn->vexPrefix[1])) + attrMask |= ATTR_VEXL; + } + else { + return -1; + } + } + else { + if (insn->rexPrefix & 0x08) + attrMask |= ATTR_REXW; - if (insn->rexPrefix & 0x08) - attrMask |= ATTR_REXW; - - if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) - attrMask |= ATTR_OPSIZE; - else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) - attrMask |= ATTR_XS; - else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) - attrMask |= ATTR_XD; - + if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) + attrMask |= ATTR_OPSIZE; + else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) + attrMask |= ATTR_XS; + else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) + attrMask |= ATTR_XD; + + } + if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; @@ -1012,6 +1169,8 @@ static int readModRM(struct InternalInstruction* insn) { return prefix##_EAX + index; \ case TYPE_R64: \ return prefix##_RAX + index; \ + case TYPE_XMM256: \ + return prefix##_YMM0 + index; \ case TYPE_XMM128: \ case TYPE_XMM64: \ case TYPE_XMM32: \ @@ -1073,6 +1232,14 @@ static int fixupReg(struct InternalInstruction *insn, default: debug("Expected a REG or R/M encoding in fixupReg"); return -1; + case ENCODING_VVVV: + insn->vvvv = (Reg)fixupRegValue(insn, + (OperandType)op->type, + insn->vvvv, + &valid); + if (!valid) + return -1; + break; case ENCODING_REG: insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type, @@ -1236,6 +1403,27 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { return 0; } +/* + * readVVVV - Consumes an immediate operand from an instruction, given the + * desired operand size. + * + * @param insn - The instruction whose operand is to be read. + * @return - 0 if the immediate was successfully consumed; nonzero + * otherwise. + */ +static int readVVVV(struct InternalInstruction* insn) { + dbgprintf(insn, "readVVVV()"); + + if (insn->vexSize == 3) + insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]); + else if (insn->vexSize == 2) + insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]); + else + return -1; + + return 0; +} + /* * readOperands - Consults the specifier for an instruction and consumes all * operands for that instruction, interpreting them as it goes. @@ -1317,6 +1505,13 @@ static int readOperands(struct InternalInstruction* insn) { case ENCODING_I: if (readOpcodeModifier(insn)) return -1; + break; + case ENCODING_VVVV: + if (readVVVV(insn)) + return -1; + if (fixupReg(insn, &insn->spec->operands[index])) + return -1; + break; case ENCODING_DUP: break; default: diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index d0dc8b56aea..d4a88d765d1 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -34,16 +34,30 @@ extern "C" { /* * Accessor functions for various fields of an Intel instruction */ -#define modFromModRM(modRM) ((modRM & 0xc0) >> 6) -#define regFromModRM(modRM) ((modRM & 0x38) >> 3) -#define rmFromModRM(modRM) (modRM & 0x7) -#define scaleFromSIB(sib) ((sib & 0xc0) >> 6) -#define indexFromSIB(sib) ((sib & 0x38) >> 3) -#define baseFromSIB(sib) (sib & 0x7) -#define wFromREX(rex) ((rex & 0x8) >> 3) -#define rFromREX(rex) ((rex & 0x4) >> 2) -#define xFromREX(rex) ((rex & 0x2) >> 1) -#define bFromREX(rex) (rex & 0x1) +#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6) +#define regFromModRM(modRM) (((modRM) & 0x38) >> 3) +#define rmFromModRM(modRM) ((modRM) & 0x7) +#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6) +#define indexFromSIB(sib) (((sib) & 0x38) >> 3) +#define baseFromSIB(sib) ((sib) & 0x7) +#define wFromREX(rex) (((rex) & 0x8) >> 3) +#define rFromREX(rex) (((rex) & 0x4) >> 2) +#define xFromREX(rex) (((rex) & 0x2) >> 1) +#define bFromREX(rex) ((rex) & 0x1) + +#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7) +#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6) +#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5) +#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f) +#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7) +#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3) +#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2) +#define ppFromVEX3of3(vex) ((vex) & 0x3) + +#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7) +#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3) +#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2) +#define ppFromVEX2of2(vex) ((vex) & 0x3) /* * These enums represent Intel registers for use by the decoder. @@ -206,7 +220,25 @@ extern "C" { ENTRY(XMM13) \ ENTRY(XMM14) \ ENTRY(XMM15) - + +#define REGS_YMM \ + ENTRY(YMM0) \ + ENTRY(YMM1) \ + ENTRY(YMM2) \ + ENTRY(YMM3) \ + ENTRY(YMM4) \ + ENTRY(YMM5) \ + ENTRY(YMM6) \ + ENTRY(YMM7) \ + ENTRY(YMM8) \ + ENTRY(YMM9) \ + ENTRY(YMM10) \ + ENTRY(YMM11) \ + ENTRY(YMM12) \ + ENTRY(YMM13) \ + ENTRY(YMM14) \ + ENTRY(YMM15) + #define REGS_SEGMENT \ ENTRY(ES) \ ENTRY(CS) \ @@ -252,6 +284,7 @@ extern "C" { REGS_64BIT \ REGS_MMX \ REGS_XMM \ + REGS_YMM \ REGS_SEGMENT \ REGS_DEBUG \ REGS_CONTROL \ @@ -332,6 +365,27 @@ typedef enum { SEG_OVERRIDE_GS, SEG_OVERRIDE_max } SegmentOverride; + +/* + * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field + */ + +typedef enum { + VEX_LOB_0F = 0x1, + VEX_LOB_0F38 = 0x2, + VEX_LOB_0F3A = 0x3 +} VEXLeadingOpcodeByte; + +/* + * VEXPrefixCode - Possible values for the VEX.pp field + */ + +typedef enum { + VEX_PREFIX_NONE = 0x0, + VEX_PREFIX_66 = 0x1, + VEX_PREFIX_F3 = 0x2, + VEX_PREFIX_F2 = 0x3 +} VEXPrefixCode; typedef uint8_t BOOL; @@ -389,10 +443,12 @@ struct InternalInstruction { uint8_t prefixPresent[0x100]; /* contains the location (for use with the reader) of the prefix byte */ uint64_t prefixLocations[0x100]; + /* The value of the VEX prefix, if present */ + uint8_t vexPrefix[3]; + /* The length of the VEX prefix (0 if not present) */ + uint8_t vexSize; /* The value of the REX prefix, if present */ uint8_t rexPrefix; - /* The location of the REX prefix */ - uint64_t rexLocation; /* The location where a mandatory prefix would have to be (i.e., right before the opcode, or right before the REX prefix if one is present) */ uint64_t necessaryPrefixLocation; @@ -428,6 +484,10 @@ struct InternalInstruction { /* state for additional bytes, consumed during operand decode. Pattern: consumed___ indicates that the byte was already consumed and does not need to be consumed again */ + + /* The VEX.vvvv field, which contains a thrid register operand for some AVX + instructions */ + Reg vvvv; /* The ModR/M byte, which contains most register operands and some portion of all memory operands */ diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 1425b86ba53..bc35be80c83 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -49,7 +49,9 @@ ENUM_ENTRY(ATTR_XS, 0x02) \ ENUM_ENTRY(ATTR_XD, 0x04) \ ENUM_ENTRY(ATTR_REXW, 0x08) \ - ENUM_ENTRY(ATTR_OPSIZE, 0x10) + ENUM_ENTRY(ATTR_OPSIZE, 0x10) \ + ENUM_ENTRY(ATTR_VEX, 0x20) \ + ENUM_ENTRY(ATTR_VEXL, 0x40) #define ENUM_ENTRY(n, v) n = v, enum attributeBits { @@ -87,7 +89,20 @@ enum attributeBits { "IC_64BIT_REXW_XS") \ ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \ "else because this changes most " \ - "operands' meaning") + "operands' meaning") \ + ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \ + ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \ + ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \ + ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \ + ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \ + ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \ + ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \ + ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \ + ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \ + ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\ + ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XS prefix")\ + ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") + #define ENUM_ENTRY(n, r, d) n, typedef enum { @@ -183,6 +198,7 @@ struct ContextDecision { ENUM_ENTRY(ENCODING_NONE, "") \ ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \ ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \ + ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \ ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \ ENUM_ENTRY(ENCODING_CW, "2-byte") \ ENUM_ENTRY(ENCODING_CD, "4-byte") \ @@ -278,6 +294,7 @@ struct ContextDecision { ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \ ENUM_ENTRY(TYPE_XMM64, "8-byte") \ ENUM_ENTRY(TYPE_XMM128, "16-byte") \ + ENUM_ENTRY(TYPE_XMM256, "32-byte") \ ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \ ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \ ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \ diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp index 94797f55f71..081b5771cdb 100644 --- a/utils/TableGen/X86DisassemblerTables.cpp +++ b/utils/TableGen/X86DisassemblerTables.cpp @@ -46,9 +46,11 @@ static inline bool inheritsFrom(InstructionContext child, case IC_OPSIZE: return(inheritsFrom(child, IC_64BIT_OPSIZE)); case IC_XD: - return(inheritsFrom(child, IC_64BIT_XD)); + return(inheritsFrom(child, IC_64BIT_XD) || + inheritsFrom(child, IC_VEX_XD)); case IC_XS: - return(inheritsFrom(child, IC_64BIT_XS)); + return(inheritsFrom(child, IC_64BIT_XS) || + inheritsFrom(child, IC_VEX_XS)); case IC_64BIT_REXW: return(inheritsFrom(child, IC_64BIT_REXW_XS) || inheritsFrom(child, IC_64BIT_REXW_XD) || @@ -65,6 +67,35 @@ static inline bool inheritsFrom(InstructionContext child, return false; case IC_64BIT_REXW_OPSIZE: return false; + case IC_VEX: + return(inheritsFrom(child, IC_VEX_XS) || + inheritsFrom(child, IC_VEX_XD) || + inheritsFrom(child, IC_VEX_L) || + inheritsFrom(child, IC_VEX_W) || + inheritsFrom(child, IC_VEX_OPSIZE)); + case IC_VEX_XS: + return(inheritsFrom(child, IC_VEX_L_XS) || + inheritsFrom(child, IC_VEX_W_XS)); + case IC_VEX_XD: + return(inheritsFrom(child, IC_VEX_L_XD) || + inheritsFrom(child, IC_VEX_W_XD)); + case IC_VEX_L: + return(inheritsFrom(child, IC_VEX_L_XS) || + inheritsFrom(child, IC_VEX_L_XD)); + case IC_VEX_L_XS: + return false; + case IC_VEX_L_XD: + return false; + case IC_VEX_W: + return(inheritsFrom(child, IC_VEX_W_XS) || + inheritsFrom(child, IC_VEX_W_XD) || + inheritsFrom(child, IC_VEX_W_OPSIZE)); + case IC_VEX_W_XS: + return false; + case IC_VEX_W_XD: + return false; + case IC_VEX_OPSIZE: + return inheritsFrom(child, IC_VEX_W_OPSIZE); default: return false; } @@ -461,7 +492,29 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const { for (index = 0; index < 256; ++index) { o.indent(i * 2); - if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS)) + if ((index & ATTR_VEXL) && (index & ATTR_OPSIZE)) + o << "IC_VEX_L_OPSIZE"; + else if ((index & ATTR_VEXL) && (index & ATTR_XD)) + o << "IC_VEX_L_XD"; + else if ((index & ATTR_VEXL) && (index & ATTR_XS)) + o << "IC_VEX_L_XS"; + else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_OPSIZE)) + o << "IC_VEX_W_OPSIZE"; + else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_XD)) + o << "IC_VEX_W_XD"; + else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_XS)) + o << "IC_VEX_W_XS"; + else if (index & ATTR_VEXL) + o << "IC_VEX_L"; + else if ((index & ATTR_VEX) && (index & ATTR_REXW)) + o << "IC_VEX_W"; + else if ((index & ATTR_VEX) && (index & ATTR_OPSIZE)) + o << "IC_VEX_OPSIZE"; + else if ((index & ATTR_VEX) && (index & ATTR_XD)) + o << "IC_VEX_XD"; + else if ((index & ATTR_VEX) && (index & ATTR_XS)) + o << "IC_VEX_XS"; + else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS)) o << "IC_64BIT_REXW_XS"; else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD)) o << "IC_64BIT_REXW_XD"; @@ -484,6 +537,8 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const { o << "IC_XD"; else if (index & ATTR_OPSIZE) o << "IC_OPSIZE"; + else if (index & ATTR_VEX) + o << "IC_VEX"; else o << "IC"; diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index b0839c33982..805cae7b3e6 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -214,7 +214,9 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables, HasOpSizePrefix = Rec->getValueAsBit("hasOpSizePrefix"); HasREX_WPrefix = Rec->getValueAsBit("hasREX_WPrefix"); + HasVEXPrefix = Rec->getValueAsBit("hasVEXPrefix"); HasVEX_4VPrefix = Rec->getValueAsBit("hasVEX_4VPrefix"); + HasVEX_WPrefix = Rec->getValueAsBit("hasVEX_WPrefix"); HasLockPrefix = Rec->getValueAsBit("hasLockPrefix"); IsCodeGenOnly = Rec->getValueAsBit("isCodeGenOnly"); @@ -224,7 +226,8 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables, Operands = &insn.Operands.OperandList; IsSSE = HasOpSizePrefix && (Name.find("16") == Name.npos); - HasFROperands = false; + HasFROperands = hasFROperands(); + HasVEX_LPrefix = has256BitOperands() || Rec->getValueAsBit("hasVEX_L"); ShouldBeEmitted = true; } @@ -248,7 +251,32 @@ void RecognizableInstr::processInstr(DisassemblerTables &tables, InstructionContext RecognizableInstr::insnContext() const { InstructionContext insnContext; - if (Name.find("64") != Name.npos || HasREX_WPrefix) { + if (HasVEX_4VPrefix || HasVEXPrefix) { + if (HasOpSizePrefix && HasVEX_LPrefix) + insnContext = IC_VEX_L_OPSIZE; + else if (HasOpSizePrefix && HasVEX_WPrefix) + insnContext = IC_VEX_W_OPSIZE; + else if (HasOpSizePrefix) + insnContext = IC_VEX_OPSIZE; + else if (HasVEX_LPrefix && Prefix == X86Local::XS) + insnContext = IC_VEX_L_XS; + else if (HasVEX_LPrefix && Prefix == X86Local::XD) + insnContext = IC_VEX_L_XD; + else if (HasVEX_WPrefix && Prefix == X86Local::XS) + insnContext = IC_VEX_W_XS; + else if (HasVEX_WPrefix && Prefix == X86Local::XD) + insnContext = IC_VEX_W_XD; + else if (HasVEX_WPrefix) + insnContext = IC_VEX_W; + else if (HasVEX_LPrefix) + insnContext = IC_VEX_L; + else if (Prefix == X86Local::XD) + insnContext = IC_VEX_XD; + else if (Prefix == X86Local::XS) + insnContext = IC_VEX_XS; + else + insnContext = IC_VEX; + } else if (Name.find("64") != Name.npos || HasREX_WPrefix) { if (HasREX_WPrefix && HasOpSizePrefix) insnContext = IC_64BIT_REXW_OPSIZE; else if (HasOpSizePrefix) @@ -280,6 +308,10 @@ InstructionContext RecognizableInstr::insnContext() const { } RecognizableInstr::filter_ret RecognizableInstr::filter() const { + /////////////////// + // FILTER_STRONG + // + // Filter out intrinsics if (!Rec->isSubClassOf("X86Inst")) @@ -291,26 +323,71 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const { if (Form == X86Local::MRMInitReg) return FILTER_STRONG; + + + // TEMPORARY pending bug fixes - + if (Name.find("VMOVDQU") != Name.npos || + Name.find("VMOVDQA") != Name.npos || + Name.find("VROUND") != Name.npos) + return FILTER_STRONG; + + // Filter out artificial instructions + + if (Name.find("TAILJMP") != Name.npos || + Name.find("_Int") != Name.npos || + Name.find("_int") != Name.npos || + Name.find("Int_") != Name.npos || + Name.find("_NOREX") != Name.npos || + Name.find("_TC") != Name.npos || + Name.find("EH_RETURN") != Name.npos || + Name.find("V_SET") != Name.npos || + Name.find("LOCK_") != Name.npos || + Name.find("WIN") != Name.npos || + Name.find("_AVX") != Name.npos || + Name.find("2SDL") != Name.npos) + return FILTER_STRONG; + + // Filter out instructions with segment override prefixes. + // They're too messy to handle now and we'll special case them if needed. + + if (SegOvr) + return FILTER_STRONG; + + // Filter out instructions that can't be printed. + + if (AsmString.size() == 0) + return FILTER_STRONG; + + // Filter out instructions with subreg operands. + + if (AsmString.find("subreg") != AsmString.npos) + return FILTER_STRONG; + + ///////////////// + // FILTER_WEAK + // + + // Filter out instructions with a LOCK prefix; // prefer forms that do not have the prefix if (HasLockPrefix) return FILTER_WEAK; - - // Filter out artificial instructions - if (Name.find("TAILJMP") != Name.npos || - Name.find("_Int") != Name.npos || - Name.find("_int") != Name.npos || - Name.find("Int_") != Name.npos || - Name.find("_NOREX") != Name.npos || - Name.find("_TC") != Name.npos || - Name.find("EH_RETURN") != Name.npos || - Name.find("V_SET") != Name.npos || - Name.find("LOCK_") != Name.npos || - Name.find("WIN") != Name.npos) - return FILTER_STRONG; + // Filter out alternate forms of AVX instructions + if (Name.find("_alt") != Name.npos || + Name.find("XrYr") != Name.npos || + Name.find("r64r") != Name.npos || + Name.find("_64mr") != Name.npos || + Name.find("Xrr") != Name.npos || + Name.find("rr64") != Name.npos) + return FILTER_WEAK; + + if (Name == "VMASKMOVDQU64" || + Name == "VEXTRACTPSrr64" || + Name == "VMOVQd64rr" || + Name == "VMOVQs64rr") + return FILTER_WEAK; // Special cases. @@ -339,6 +416,7 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const { Name == "PUSH32i16" || Name == "PUSH64i16" || Name == "MOVPQI2QImr" || + Name == "VMOVPQI2QImr" || Name == "MOVSDmr" || Name == "MOVSDrm" || Name == "MOVSSmr" || @@ -349,22 +427,6 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const { Name == "CRC32r16") return FILTER_WEAK; - // Filter out instructions with segment override prefixes. - // They're too messy to handle now and we'll special case them if needed. - - if (SegOvr) - return FILTER_STRONG; - - // Filter out instructions that can't be printed. - - if (AsmString.size() == 0) - return FILTER_STRONG; - - // Filter out instructions with subreg operands. - - if (AsmString.find("subreg") != AsmString.npos) - return FILTER_STRONG; - if (HasFROperands && Name.find("MOV") != Name.npos && ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) || (Name.find("to") != Name.npos))) @@ -372,6 +434,33 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const { return FILTER_NORMAL; } + +bool RecognizableInstr::hasFROperands() const { + const std::vector &OperandList = *Operands; + unsigned numOperands = OperandList.size(); + + for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) { + const std::string &recName = OperandList[operandIndex].Rec->getName(); + + if (recName.find("FR") != recName.npos) + return true; + } + return false; +} + +bool RecognizableInstr::has256BitOperands() const { + const std::vector &OperandList = *Operands; + unsigned numOperands = OperandList.size(); + + for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) { + const std::string &recName = OperandList[operandIndex].Rec->getName(); + + if (!recName.compare("VR256") || !recName.compare("f256mem")) { + return true; + } + } + return false; +} void RecognizableInstr::handleOperand( bool optional, @@ -395,13 +484,13 @@ void RecognizableInstr::handleOperand( } const std::string &typeName = (*Operands)[operandIndex].Rec->getName(); - + Spec->operands[operandIndex].encoding = encodingFromString(typeName, HasOpSizePrefix); Spec->operands[operandIndex].type = typeFromString(typeName, - IsSSE, - HasREX_WPrefix, - HasOpSizePrefix); + IsSSE, + HasREX_WPrefix, + HasOpSizePrefix); ++operandIndex; ++physicalOperandIndex; @@ -530,31 +619,45 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { case X86Local::MRMSrcReg: // Operand 1 is a register operand in the Reg/Opcode field. // Operand 2 is a register operand in the R/M field. + // - In AVX, there is a register operand in the VEX.vvvv field here - // Operand 3 (optional) is an immediate. - assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 && - "Unexpected number of operands for MRMSrcRegFrm"); - HANDLE_OPERAND(roRegister) - HANDLE_OPERAND(rmRegister) + if (HasVEX_4VPrefix) + assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 4 && + "Unexpected number of operands for MRMSrcRegFrm with VEX_4V"); + else + assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 && + "Unexpected number of operands for MRMSrcRegFrm"); + + HANDLE_OPERAND(roRegister) + if (HasVEX_4VPrefix) // FIXME: In AVX, the register below becomes the one encoded // in ModRMVEX and the one above the one in the VEX.VVVV field - HANDLE_OPTIONAL(rmRegister) - else - HANDLE_OPTIONAL(immediate) + HANDLE_OPERAND(vvvvRegister) + + HANDLE_OPERAND(rmRegister) + HANDLE_OPTIONAL(immediate) break; case X86Local::MRMSrcMem: // Operand 1 is a register operand in the Reg/Opcode field. // Operand 2 is a memory operand (possibly SIB-extended) + // - In AVX, there is a register operand in the VEX.vvvv field here - // Operand 3 (optional) is an immediate. - assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 && - "Unexpected number of operands for MRMSrcMemFrm"); + + if (HasVEX_4VPrefix) + assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 4 && + "Unexpected number of operands for MRMSrcMemFrm with VEX_4V"); + else + assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 && + "Unexpected number of operands for MRMSrcMemFrm"); + HANDLE_OPERAND(roRegister) if (HasVEX_4VPrefix) // FIXME: In AVX, the register below becomes the one encoded // in ModRMVEX and the one above the one in the VEX.VVVV field - HANDLE_OPTIONAL(rmRegister) + HANDLE_OPERAND(vvvvRegister) HANDLE_OPERAND(memory) HANDLE_OPTIONAL(immediate) @@ -569,8 +672,14 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { case X86Local::MRM7r: // Operand 1 is a register operand in the R/M field. // Operand 2 (optional) is an immediate or relocation. - assert(numPhysicalOperands <= 2 && - "Unexpected number of operands for MRMnRFrm"); + if (HasVEX_4VPrefix) + assert(numPhysicalOperands <= 3 && + "Unexpected number of operands for MRMSrcMemFrm with VEX_4V"); + else + assert(numPhysicalOperands <= 2 && + "Unexpected number of operands for MRMnRFrm"); + if (HasVEX_4VPrefix) + HANDLE_OPERAND(vvvvRegister); HANDLE_OPTIONAL(rmRegister) HANDLE_OPTIONAL(relocation) break; @@ -854,6 +963,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s, TYPE("ssmem", TYPE_M32FP) TYPE("RST", TYPE_ST) TYPE("i128mem", TYPE_M128) + TYPE("i256mem", TYPE_M256) TYPE("i64i32imm_pcrel", TYPE_REL64) TYPE("i16imm_pcrel", TYPE_REL16) TYPE("i32imm_pcrel", TYPE_REL32) @@ -878,6 +988,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s, TYPE("offset16", TYPE_MOFFS16) TYPE("offset32", TYPE_MOFFS32) TYPE("offset64", TYPE_MOFFS64) + TYPE("VR256", TYPE_XMM256) errs() << "Unhandled type string " << s << "\n"; llvm_unreachable("Unhandled type string"); } @@ -900,6 +1011,10 @@ OperandEncoding RecognizableInstr::immediateEncodingFromString ENCODING("i64i32imm", ENCODING_ID) ENCODING("i64i8imm", ENCODING_IB) ENCODING("i8imm", ENCODING_IB) + // This is not a typo. Instructions like BLENDVPD put + // register IDs in 8-bit immediates nowadays. + ENCODING("VR256", ENCODING_IB) + ENCODING("VR128", ENCODING_IB) errs() << "Unhandled immediate encoding " << s << "\n"; llvm_unreachable("Unhandled immediate encoding"); } @@ -915,6 +1030,7 @@ OperandEncoding RecognizableInstr::rmRegisterEncodingFromString ENCODING("FR64", ENCODING_RM) ENCODING("FR32", ENCODING_RM) ENCODING("VR64", ENCODING_RM) + ENCODING("VR256", ENCODING_RM) errs() << "Unhandled R/M register encoding " << s << "\n"; llvm_unreachable("Unhandled R/M register encoding"); } @@ -933,10 +1049,22 @@ OperandEncoding RecognizableInstr::roRegisterEncodingFromString ENCODING("SEGMENT_REG", ENCODING_REG) ENCODING("DEBUG_REG", ENCODING_REG) ENCODING("CONTROL_REG", ENCODING_REG) + ENCODING("VR256", ENCODING_REG) errs() << "Unhandled reg/opcode register encoding " << s << "\n"; llvm_unreachable("Unhandled reg/opcode register encoding"); } +OperandEncoding RecognizableInstr::vvvvRegisterEncodingFromString + (const std::string &s, + bool hasOpSizePrefix) { + ENCODING("FR32", ENCODING_VVVV) + ENCODING("FR64", ENCODING_VVVV) + ENCODING("VR128", ENCODING_VVVV) + ENCODING("VR256", ENCODING_VVVV) + errs() << "Unhandled VEX.vvvv register encoding " << s << "\n"; + llvm_unreachable("Unhandled VEX.vvvv register encoding"); +} + OperandEncoding RecognizableInstr::memoryEncodingFromString (const std::string &s, bool hasOpSizePrefix) { @@ -951,6 +1079,7 @@ OperandEncoding RecognizableInstr::memoryEncodingFromString ENCODING("f64mem", ENCODING_RM) ENCODING("f32mem", ENCODING_RM) ENCODING("i128mem", ENCODING_RM) + ENCODING("i256mem", ENCODING_RM) ENCODING("f80mem", ENCODING_RM) ENCODING("lea32mem", ENCODING_RM) ENCODING("lea64_32mem", ENCODING_RM) diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h index c043b909b42..c7ec18ca6db 100644 --- a/utils/TableGen/X86RecognizableInstr.h +++ b/utils/TableGen/X86RecognizableInstr.h @@ -52,8 +52,14 @@ private: bool HasOpSizePrefix; /// The hasREX_WPrefix field from the record bool HasREX_WPrefix; + /// The hasVEXPrefix field from the record + bool HasVEXPrefix; /// The hasVEX_4VPrefix field from the record bool HasVEX_4VPrefix; + /// The hasVEX_WPrefix field from the record + bool HasVEX_WPrefix; + /// Inferred from the operands; indicates whether the L bit in the VEX prefix is set + bool HasVEX_LPrefix; /// The hasLockPrefix field from the record bool HasLockPrefix; /// The isCodeGenOnly filed from the record @@ -96,7 +102,7 @@ private: // error if it conflcits with any other FILTER_NORMAL // instruction }; - + /// filter - Determines whether the instruction should be decodable. Some /// instructions are pure intrinsics and use unencodable operands; many /// synthetic instructions are duplicates of other instructions; other @@ -106,6 +112,12 @@ private: /// /// @return - The degree of filtering to be applied (see filter_ret). filter_ret filter() const; + + /// hasFROperands - Returns true if any operand is a FR operand. + bool hasFROperands() const; + + /// has256BitOperands - Returns true if any operand is a 256-bit SSE operand. + bool has256BitOperands() const; /// typeFromString - Translates an operand type from the string provided in /// the LLVM tables to an OperandType for use in the operand specifier. @@ -155,6 +167,8 @@ private: bool hasOpSizePrefix); static OperandEncoding opcodeModifierEncodingFromString(const std::string &s, bool hasOpSizePrefix); + static OperandEncoding vvvvRegisterEncodingFromString(const std::string &s, + bool HasOpSizePrefix); /// handleOperand - Converts a single operand from the LLVM table format to /// the emitted table format, handling any duplicate operands it encounters