- Add AVX SSE2 Move doubleword and quadword instructions.

- Add encode bits for VEX_W
- All 128-bit SSE 1 & SSE2 instructions that are described
  in the .td file now have a AVX encoded form already working.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107365 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2010-07-01 01:20:06 +00:00
parent 2e5238ffa0
commit 6596a62076
6 changed files with 203 additions and 17 deletions

View File

@ -102,6 +102,7 @@ class T8 { bits<4> Prefix = 13; }
class TA { bits<4> Prefix = 14; }
class TF { bits<4> Prefix = 15; }
class VEX { bit hasVEXPrefix = 1; }
class VEX_W { bit hasVEX_WPrefix = 1; }
class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
@ -130,8 +131,9 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
bits<2> SegOvrBits = 0; // Segment override prefix.
Domain ExeDomain = d;
bit hasVEXPrefix = 0; // Does this inst requires a VEX prefix?
bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field?
bit hasVEXPrefix = 0; // Does this inst requires a VEX prefix?
bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field?
bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field?
// TSFlags layout should be kept in sync with X86InstrInfo.h.
let TSFlags{5-0} = FormBits;
@ -146,7 +148,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{23-22} = ExeDomain.Value;
let TSFlags{31-24} = Opcode;
let TSFlags{32} = hasVEXPrefix;
let TSFlags{33} = hasVEX_4VPrefix;
let TSFlags{33} = hasVEX_WPrefix;
let TSFlags{34} = hasVEX_4VPrefix;
}
class I<bits<8> o, Format f, dag outs, dag ins, string asm,

View File

@ -429,9 +429,9 @@ namespace X86II {
// address instructions in SSE are represented as 3 address ones in AVX
// and the additional register is encoded in VEX_VVVV prefix.
//
VEXShift = 0,
VEX = 1 << VEXShift,
VEX_4V = 2 << VEXShift
VEX = 1,
VEX_W = 1 << 1,
VEX_4V = 1 << 2
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the

View File

@ -2888,6 +2888,18 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
// SSE2 - Move Doubleword
//===---------------------------------------------------------------------===//
// Move Int Doubleword to Packed Double Int
let isAsmParserOnly = 1 in {
def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector GR32:$src)))]>, VEX;
def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
VEX;
}
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@ -2897,6 +2909,18 @@ def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
// Move Int Doubleword to Single Scalar
let isAsmParserOnly = 1 in {
def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
VEX;
}
def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))]>;
@ -2905,6 +2929,18 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
// Move Packed Doubleword Int to Packed Double Int
let isAsmParserOnly = 1 in {
def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
(iPTR 0)))]>, VEX;
def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)]>, VEX;
}
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@ -2914,6 +2950,15 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)]>;
// Move Scalar Single to Double Int
let isAsmParserOnly = 1 in {
def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX;
}
def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))]>;
@ -2922,19 +2967,37 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
// movd / movq to XMM register zero-extends
let AddedComplexity = 15, isAsmParserOnly = 1 in {
def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4i32 (X86vzmovl
(v4i32 (scalar_to_vector GR32:$src)))))]>,
VEX;
def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
[(set VR128:$dst, (v2i64 (X86vzmovl
(v2i64 (scalar_to_vector GR64:$src)))))]>,
VEX, VEX_W;
}
let AddedComplexity = 15 in {
def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4i32 (X86vzmovl
(v4i32 (scalar_to_vector GR32:$src)))))]>;
// This is X86-64 only.
def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
"mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
[(set VR128:$dst, (v2i64 (X86vzmovl
(v2i64 (scalar_to_vector GR64:$src)))))]>;
}
let AddedComplexity = 20 in {
let isAsmParserOnly = 1 in
def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86vzmovl (v4i32 (scalar_to_vector
(loadi32 addr:$src))))))]>,
VEX;
def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@ -2953,12 +3016,25 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
// SSE2 - Move Quadword
//===---------------------------------------------------------------------===//
// SSE2 instructions with XS prefix
// Move Quadword Int to Packed Quadword Int
let isAsmParserOnly = 1 in
def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
VEX, Requires<[HasAVX, HasSSE2]>;
def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
Requires<[HasSSE2]>;
Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
// Move Packed Quadword Int to Quadword Int
let isAsmParserOnly = 1 in
def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (vector_extract (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)]>, VEX;
def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (vector_extract (v2i64 VR128:$src),
@ -2968,17 +3044,29 @@ def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
(f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
// Store / copy lower 64-bits of a XMM register.
let isAsmParserOnly = 1 in
def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
let AddedComplexity = 20, isAsmParserOnly = 1 in
def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
(loadi64 addr:$src))))))]>,
XS, VEX, Requires<[HasAVX, HasSSE2]>;
let AddedComplexity = 20 in {
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
(loadi64 addr:$src))))))]>, XS,
Requires<[HasSSE2]>;
(loadi64 addr:$src))))))]>,
XS, Requires<[HasSSE2]>;
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
(MOVZQI2PQIrm addr:$src)>;
@ -2989,12 +3077,23 @@ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
// IA32 document. movq xmm1, xmm2 does clear the high bits.
let isAsmParserOnly = 1, AddedComplexity = 15 in
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
XS, VEX, Requires<[HasAVX, HasSSE2]>;
let AddedComplexity = 15 in
def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
XS, Requires<[HasSSE2]>;
let AddedComplexity = 20, isAsmParserOnly = 1 in
def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl
(loadv2i64 addr:$src))))]>,
XS, VEX, Requires<[HasAVX, HasSSE2]>;
let AddedComplexity = 20 in {
def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movq\t{$src, $dst|$dst, $src}",
@ -3006,10 +3105,22 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
(MOVZPQILo2PQIrm addr:$src)>;
}
// Instructions to match in the assembler
let isAsmParserOnly = 1 in {
// This instructions is in fact an alias to movd with 64 bit dst
def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
}
// Instructions for the disassembler
// xr = XMM register
// xm = mem64
let isAsmParserOnly = 1 in
def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", []>, XS;

View File

@ -449,6 +449,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if (TSFlags & X86II::OpSize)
VEX_PP = 0x01;
if ((TSFlags >> 32) & X86II::VEX_W)
VEX_W = 1;
switch (TSFlags & X86II::Op0Mask) {
default: assert(0 && "Invalid prefix!");
case X86II::T8: // 0F 38
@ -508,15 +511,20 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_X = 0x0;
}
break;
default: // MRM0r-MRM7r
default: // MRMDestReg, MRM0r-MRM7r
if (MI.getOperand(CurOp).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0;
if (HasVEX_4V)
VEX_4V = getVEXRegisterEncoding(MI, CurOp);
CurOp++;
for (; CurOp != NumOps; ++CurOp) {
const MCOperand &MO = MI.getOperand(CurOp);
if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
VEX_B = 0x0;
if (MO.isReg() && !HasVEX_4V &&
X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
VEX_R = 0x0;
}
break;
assert(0 && "Not implemented!");
@ -535,7 +543,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
//
unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
if (VEX_B && VEX_X) { // 2 byte VEX prefix
if (VEX_B && VEX_X && !VEX_W) { // 2 byte VEX prefix
EmitByte(0xC5, CurByte, OS);
EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
return;
@ -543,7 +551,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// 3 byte VEX prefix
EmitByte(0xC4, CurByte, OS);
EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_5M, CurByte, OS);
EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
}

View File

@ -11502,3 +11502,31 @@
// CHECK: encoding: [0xc5,0xf9,0xf7,0xd1]
vmaskmovdqu %xmm1, %xmm2
// CHECK: vmovd %xmm1, %eax
// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8]
vmovd %xmm1, %eax
// CHECK: vmovd %xmm1, (%eax)
// CHECK: encoding: [0xc5,0xf9,0x7e,0x08]
vmovd %xmm1, (%eax)
// CHECK: vmovd %eax, %xmm1
// CHECK: encoding: [0xc5,0xf9,0x6e,0xc8]
vmovd %eax, %xmm1
// CHECK: vmovd (%eax), %xmm1
// CHECK: encoding: [0xc5,0xf9,0x6e,0x08]
vmovd (%eax), %xmm1
// CHECK: vmovq %xmm1, (%eax)
// CHECK: encoding: [0xc5,0xf9,0xd6,0x08]
vmovq %xmm1, (%eax)
// CHECK: vmovq %xmm1, %xmm2
// CHECK: encoding: [0xc5,0xfa,0x7e,0xd1]
vmovq %xmm1, %xmm2
// CHECK: vmovq (%eax), %xmm1
// CHECK: encoding: [0xc5,0xfa,0x7e,0x08]
vmovq (%eax), %xmm1

View File

@ -1542,3 +1542,39 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0xc4,0x41,0x79,0xf7,0xfe]
vmaskmovdqu %xmm14, %xmm15
// CHECK: vmovd %eax, %xmm14
// CHECK: encoding: [0xc5,0x79,0x6e,0xf0]
vmovd %eax, %xmm14
// CHECK: vmovd (%rax), %xmm14
// CHECK: encoding: [0xc5,0x79,0x6e,0x30]
vmovd (%rax), %xmm14
// CHECK: vmovd %xmm14, (%rax)
// CHECK: encoding: [0xc5,0x79,0x7e,0x30]
vmovd %xmm14, (%rax)
// CHECK: vmovd %rax, %xmm14
// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
vmovd %rax, %xmm14
// CHECK: vmovq %xmm14, (%rax)
// CHECK: encoding: [0xc5,0x79,0xd6,0x30]
vmovq %xmm14, (%rax)
// CHECK: vmovq %xmm14, %xmm12
// CHECK: encoding: [0xc4,0x41,0x7a,0x7e,0xe6]
vmovq %xmm14, %xmm12
// CHECK: vmovq (%rax), %xmm14
// CHECK: encoding: [0xc5,0x7a,0x7e,0x30]
vmovq (%rax), %xmm14
// CHECK: vmovq %rax, %xmm14
// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
vmovq %rax, %xmm14
// CHECK: vmovq %xmm14, %rax
// CHECK: encoding: [0xc4,0x61,0xf9,0x7e,0xf0]
vmovq %xmm14, %rax