From 6596a6207627ed59f568883924a21e642934c083 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Thu, 1 Jul 2010 01:20:06 +0000 Subject: [PATCH] - Add AVX SSE2 Move doubleword and quadword instructions. - Add encode bits for VEX_W - All 128-bit SSE 1 & SSE2 instructions that are described in the .td file now have a AVX encoded form already working. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107365 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFormats.td | 9 +- lib/Target/X86/X86InstrInfo.h | 6 +- lib/Target/X86/X86InstrSSE.td | 123 ++++++++++++++++++++++-- lib/Target/X86/X86MCCodeEmitter.cpp | 18 +++- test/MC/AsmParser/X86/x86_32-encoding.s | 28 ++++++ test/MC/AsmParser/X86/x86_64-encoding.s | 36 +++++++ 6 files changed, 203 insertions(+), 17 deletions(-) diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index f54a55599c3..76e65086f3a 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -102,6 +102,7 @@ class T8 { bits<4> Prefix = 13; } class TA { bits<4> Prefix = 14; } class TF { bits<4> Prefix = 15; } class VEX { bit hasVEXPrefix = 1; } +class VEX_W { bit hasVEX_WPrefix = 1; } class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, @@ -130,8 +131,9 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix? bits<2> SegOvrBits = 0; // Segment override prefix. Domain ExeDomain = d; - bit hasVEXPrefix = 0; // Does this inst requires a VEX prefix? - bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field? + bit hasVEXPrefix = 0; // Does this inst requires a VEX prefix? + bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field? + bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field? // TSFlags layout should be kept in sync with X86InstrInfo.h. let TSFlags{5-0} = FormBits; @@ -146,7 +148,8 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{23-22} = ExeDomain.Value; let TSFlags{31-24} = Opcode; let TSFlags{32} = hasVEXPrefix; - let TSFlags{33} = hasVEX_4VPrefix; + let TSFlags{33} = hasVEX_WPrefix; + let TSFlags{34} = hasVEX_4VPrefix; } class I o, Format f, dag outs, dag ins, string asm, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index d1315702340..daf980eac14 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -429,9 +429,9 @@ namespace X86II { // address instructions in SSE are represented as 3 address ones in AVX // and the additional register is encoded in VEX_VVVV prefix. // - VEXShift = 0, - VEX = 1 << VEXShift, - VEX_4V = 2 << VEXShift + VEX = 1, + VEX_W = 1 << 1, + VEX_4V = 1 << 2 }; // getBaseOpcodeFor - This function returns the "base" X86 opcode for the diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index afb2888c874..0716421fc5d 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2888,6 +2888,18 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), // SSE2 - Move Doubleword //===---------------------------------------------------------------------===// +// Move Int Doubleword to Packed Double Int +let isAsmParserOnly = 1 in { +def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (scalar_to_vector GR32:$src)))]>, VEX; +def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, + VEX; +} def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2897,6 +2909,18 @@ def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>; + +// Move Int Doubleword to Single Scalar +let isAsmParserOnly = 1 in { +def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX; + +def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>, + VEX; +} def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))]>; @@ -2905,6 +2929,18 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>; +// Move Packed Doubleword Int to Packed Double Int +let isAsmParserOnly = 1 in { +def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), + (iPTR 0)))]>, VEX; +def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), + (ins i32mem:$dst, VR128:$src), + "movd\t{$src, $dst|$dst, $src}", + [(store (i32 (vector_extract (v4i32 VR128:$src), + (iPTR 0))), addr:$dst)]>, VEX; +} def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), @@ -2914,6 +2950,15 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)]>; +// Move Scalar Single to Double Int +let isAsmParserOnly = 1 in { +def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX; +def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX; +} def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))]>; @@ -2922,19 +2967,37 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>; // movd / movq to XMM register zero-extends +let AddedComplexity = 15, isAsmParserOnly = 1 in { +def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector GR32:$src)))))]>, + VEX; +def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only + [(set VR128:$dst, (v2i64 (X86vzmovl + (v2i64 (scalar_to_vector GR64:$src)))))]>, + VEX, VEX_W; +} let AddedComplexity = 15 in { def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))))]>; -// This is X86-64 only. def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))))]>; } let AddedComplexity = 20 in { +let isAsmParserOnly = 1 in +def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (X86vzmovl (v4i32 (scalar_to_vector + (loadi32 addr:$src))))))]>, + VEX; def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2953,12 +3016,25 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), // SSE2 - Move Quadword //===---------------------------------------------------------------------===// -// SSE2 instructions with XS prefix +// Move Quadword Int to Packed Quadword Int +let isAsmParserOnly = 1 in +def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, + VEX, Requires<[HasAVX, HasSSE2]>; def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, - Requires<[HasSSE2]>; + Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix + +// Move Packed Quadword Int to Quadword Int +let isAsmParserOnly = 1 in +def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(store (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))), addr:$dst)]>, VEX; def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (vector_extract (v2i64 VR128:$src), @@ -2968,17 +3044,29 @@ def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; // Store / copy lower 64-bits of a XMM register. +let isAsmParserOnly = 1 in +def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>; +let AddedComplexity = 20, isAsmParserOnly = 1 in +def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (X86vzmovl (v2i64 (scalar_to_vector + (loadi64 addr:$src))))))]>, + XS, VEX, Requires<[HasAVX, HasSSE2]>; + let AddedComplexity = 20 in { def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 (scalar_to_vector - (loadi64 addr:$src))))))]>, XS, - Requires<[HasSSE2]>; + (loadi64 addr:$src))))))]>, + XS, Requires<[HasSSE2]>; def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (MOVZQI2PQIrm addr:$src)>; @@ -2989,12 +3077,23 @@ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in // IA32 document. movq xmm1, xmm2 does clear the high bits. +let isAsmParserOnly = 1, AddedComplexity = 15 in +def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, + XS, VEX, Requires<[HasAVX, HasSSE2]>; let AddedComplexity = 15 in def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, XS, Requires<[HasSSE2]>; +let AddedComplexity = 20, isAsmParserOnly = 1 in +def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2i64 (X86vzmovl + (loadv2i64 addr:$src))))]>, + XS, VEX, Requires<[HasAVX, HasSSE2]>; let AddedComplexity = 20 in { def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movq\t{$src, $dst|$dst, $src}", @@ -3006,10 +3105,22 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), (MOVZPQILo2PQIrm addr:$src)>; } +// Instructions to match in the assembler +let isAsmParserOnly = 1 in { +// This instructions is in fact an alias to movd with 64 bit dst +def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; +def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; +} + // Instructions for the disassembler // xr = XMM register // xm = mem64 +let isAsmParserOnly = 1 in +def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", []>, XS; diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 80f56e1c22b..9f36aaa28c7 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -449,6 +449,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if (TSFlags & X86II::OpSize) VEX_PP = 0x01; + if ((TSFlags >> 32) & X86II::VEX_W) + VEX_W = 1; + switch (TSFlags & X86II::Op0Mask) { default: assert(0 && "Invalid prefix!"); case X86II::T8: // 0F 38 @@ -508,15 +511,20 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_X = 0x0; } break; - default: // MRM0r-MRM7r + default: // MRMDestReg, MRM0r-MRM7r + if (MI.getOperand(CurOp).isReg() && + X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0; + if (HasVEX_4V) VEX_4V = getVEXRegisterEncoding(MI, CurOp); CurOp++; for (; CurOp != NumOps; ++CurOp) { const MCOperand &MO = MI.getOperand(CurOp); - if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - VEX_B = 0x0; + if (MO.isReg() && !HasVEX_4V && + X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + VEX_R = 0x0; } break; assert(0 && "Not implemented!"); @@ -535,7 +543,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3); - if (VEX_B && VEX_X) { // 2 byte VEX prefix + if (VEX_B && VEX_X && !VEX_W) { // 2 byte VEX prefix EmitByte(0xC5, CurByte, OS); EmitByte(LastByte | (VEX_R << 7), CurByte, OS); return; @@ -543,7 +551,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // 3 byte VEX prefix EmitByte(0xC4, CurByte, OS); - EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_5M, CurByte, OS); + EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS); EmitByte(LastByte | (VEX_W << 7), CurByte, OS); } diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index dccb4cbe95b..e54c9cea601 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -11502,3 +11502,31 @@ // CHECK: encoding: [0xc5,0xf9,0xf7,0xd1] vmaskmovdqu %xmm1, %xmm2 +// CHECK: vmovd %xmm1, %eax +// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8] + vmovd %xmm1, %eax + +// CHECK: vmovd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x7e,0x08] + vmovd %xmm1, (%eax) + +// CHECK: vmovd %eax, %xmm1 +// CHECK: encoding: [0xc5,0xf9,0x6e,0xc8] + vmovd %eax, %xmm1 + +// CHECK: vmovd (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xf9,0x6e,0x08] + vmovd (%eax), %xmm1 + +// CHECK: vmovq %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0xd6,0x08] + vmovq %xmm1, (%eax) + +// CHECK: vmovq %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfa,0x7e,0xd1] + vmovq %xmm1, %xmm2 + +// CHECK: vmovq (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xfa,0x7e,0x08] + vmovq (%eax), %xmm1 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index fc1ae648ff2..aff44ae05fb 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -1542,3 +1542,39 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc4,0x41,0x79,0xf7,0xfe] vmaskmovdqu %xmm14, %xmm15 +// CHECK: vmovd %eax, %xmm14 +// CHECK: encoding: [0xc5,0x79,0x6e,0xf0] + vmovd %eax, %xmm14 + +// CHECK: vmovd (%rax), %xmm14 +// CHECK: encoding: [0xc5,0x79,0x6e,0x30] + vmovd (%rax), %xmm14 + +// CHECK: vmovd %xmm14, (%rax) +// CHECK: encoding: [0xc5,0x79,0x7e,0x30] + vmovd %xmm14, (%rax) + +// CHECK: vmovd %rax, %xmm14 +// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0] + vmovd %rax, %xmm14 + +// CHECK: vmovq %xmm14, (%rax) +// CHECK: encoding: [0xc5,0x79,0xd6,0x30] + vmovq %xmm14, (%rax) + +// CHECK: vmovq %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7a,0x7e,0xe6] + vmovq %xmm14, %xmm12 + +// CHECK: vmovq (%rax), %xmm14 +// CHECK: encoding: [0xc5,0x7a,0x7e,0x30] + vmovq (%rax), %xmm14 + +// CHECK: vmovq %rax, %xmm14 +// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0] + vmovq %rax, %xmm14 + +// CHECK: vmovq %xmm14, %rax +// CHECK: encoding: [0xc4,0x61,0xf9,0x7e,0xf0] + vmovq %xmm14, %rax +