From d2a4d816a167c14bf56852cf6c9a0632603f24e2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 7 Dec 2016 12:10:49 +0000 Subject: [PATCH] [X86][SSE] Consistently set MOVD/MOVQ load/store/move instructions to integer domain We are being inconsistent with these instructions (and all their variants.....) with a random mix of them using the default float domain. Differential Revision: https://reviews.llvm.org/D27419 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288902 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 130 ++++++++++-------- lib/Target/X86/X86InstrSSE.td | 18 ++- test/CodeGen/X86/2012-1-10-buildvector.ll | 2 +- test/CodeGen/X86/avx2-vbroadcast.ll | 14 +- .../X86/clear_upper_vector_element_bits.ll | 2 +- test/CodeGen/X86/exedepsfix-broadcast.ll | 12 +- test/CodeGen/X86/extractelement-index.ll | 38 +++-- test/CodeGen/X86/fp-logic.ll | 6 +- test/CodeGen/X86/masked_memop.ll | 2 +- .../X86/merge-consecutive-loads-128.ll | 26 +--- test/CodeGen/X86/pshufb-mask-comments.ll | 2 +- test/CodeGen/X86/scalar-int-to-fp.ll | 2 +- test/CodeGen/X86/uint_to_fp-2.ll | 2 +- test/CodeGen/X86/vec_fp_to_int.ll | 4 +- test/CodeGen/X86/vec_ins_extract-1.ll | 4 +- 15 files changed, 140 insertions(+), 124 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 92bb27f249a..a9bfdc7bf75 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3127,12 +3127,13 @@ let Predicates = [HasVLX] in { (VMOVDQU32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; } - -// Move Int Doubleword to Packed Double Int -// -def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), - "vmovd\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, + +// Move Int Doubleword to Packed Double Int +// +let ExeDomain = SSEPackedInt in { +def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), + "vmovd\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, EVEX; def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), @@ -3162,43 +3163,47 @@ def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64X:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>, - EVEX_CD8<64, CD8VT1>; -} - -// Move Int Doubleword to Single Scalar -// -let isCodeGenOnly = 1 in { -def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), - "vmovd\t{$src, $dst|$dst, $src}", - [(set FR32X:$dst, (bitconvert GR32:$src))], + IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>, + EVEX_CD8<64, CD8VT1>; +} +} // ExeDomain = SSEPackedInt + +// Move Int Doubleword to Single Scalar +// +let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { +def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), + "vmovd\t{$src, $dst|$dst, $src}", + [(set FR32X:$dst, (bitconvert GR32:$src))], IIC_SSE_MOVDQ>, EVEX; def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), - "vmovd\t{$src, $dst|$dst, $src}", - [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))], - IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; -} - -// Move doubleword from xmm register to r/m32 -// -def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), - "vmovd\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), + "vmovd\t{$src, $dst|$dst, $src}", + [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))], + IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; +} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 + +// Move doubleword from xmm register to r/m32 +// +let ExeDomain = SSEPackedInt in { +def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), + "vmovd\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, EVEX; def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128X:$src), "vmovd\t{$src, $dst|$dst, $src}", - [(store (i32 (extractelt (v4i32 VR128X:$src), - (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, - EVEX, EVEX_CD8<32, CD8VT1>; - -// Move quadword from xmm1 register to r/m64 -// -def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), - "vmovq\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), + [(store (i32 (extractelt (v4i32 VR128X:$src), + (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, + EVEX, EVEX_CD8<32, CD8VT1>; +} // ExeDomain = SSEPackedInt + +// Move quadword from xmm1 register to r/m64 +// +let ExeDomain = SSEPackedInt in { +def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Requires<[HasAVX512, In64BitMode]>; @@ -3219,36 +3224,39 @@ def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), let hasSideEffects = 0 in def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), - (ins VR128X:$src), - "vmovq.s\t{$src, $dst|$dst, $src}",[]>, - EVEX, VEX_W; - -// Move Scalar Single to Double Int -// -let isCodeGenOnly = 1 in { -def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), - (ins FR32X:$src), - "vmovd\t{$src, $dst|$dst, $src}", + (ins VR128X:$src), + "vmovq.s\t{$src, $dst|$dst, $src}",[]>, + EVEX, VEX_W; +} // ExeDomain = SSEPackedInt + +// Move Scalar Single to Double Int +// +let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { +def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), + (ins FR32X:$src), + "vmovd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32X:$src))], IIC_SSE_MOVD_ToGP>, EVEX; def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32X:$src), - "vmovd\t{$src, $dst|$dst, $src}", - [(store (i32 (bitconvert FR32X:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; -} - -// Move Quadword Int to Packed Quadword Int -// -def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), - (ins i64mem:$src), - "vmovq\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, - (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, - EVEX, VEX_W, EVEX_CD8<8, CD8VT8>; - -//===----------------------------------------------------------------------===// -// AVX-512 MOVSS, MOVSD + "vmovd\t{$src, $dst|$dst, $src}", + [(store (i32 (bitconvert FR32X:$src)), addr:$dst)], + IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; +} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 + +// Move Quadword Int to Packed Quadword Int +// +let ExeDomain = SSEPackedInt in { +def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), + (ins i64mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, + (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, + EVEX, VEX_W, EVEX_CD8<8, CD8VT8>; +} // ExeDomain = SSEPackedInt + +//===----------------------------------------------------------------------===// +// AVX-512 MOVSS, MOVSD //===----------------------------------------------------------------------===// multiclass avx512_move_scalar, Sched<[WriteMove]>; +} // ExeDomain = SSEPackedInt //===---------------------------------------------------------------------===// // Move Int Doubleword to Single Scalar // -let isCodeGenOnly = 1 in { +let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], @@ -4700,11 +4702,12 @@ let isCodeGenOnly = 1 in { "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; -} +} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 //===---------------------------------------------------------------------===// // Move Packed Doubleword Int to Packed Double Int // +let ExeDomain = SSEPackedInt in { def VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (extractelt (v4i32 VR128:$src), @@ -4726,6 +4729,7 @@ def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), [(store (i32 (extractelt (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, Sched<[WriteStore]>; +} // ExeDomain = SSEPackedInt def : Pat<(v8i32 (X86Vinsert (v8i32 immAllZerosV), GR32:$src2, (iPTR 0))), (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>; @@ -4742,6 +4746,7 @@ def : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))), //===---------------------------------------------------------------------===// // Move Packed Doubleword Int first element to Doubleword Int // +let ExeDomain = SSEPackedInt in { let SchedRW = [WriteMove] in { def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", @@ -4766,11 +4771,12 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in def MOVPQIto64rm : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, Sched<[WriteStore]>; +} // ExeDomain = SSEPackedInt //===---------------------------------------------------------------------===// // Bitcast FR64 <-> GR64 // -let isCodeGenOnly = 1 in { +let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { let Predicates = [UseAVX] in def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", @@ -4797,12 +4803,12 @@ let isCodeGenOnly = 1 in { "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], IIC_SSE_MOVDQ>, Sched<[WriteStore]>; -} +} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 //===---------------------------------------------------------------------===// // Move Scalar Single to Double Int // -let isCodeGenOnly = 1 in { +let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))], @@ -4819,7 +4825,7 @@ let isCodeGenOnly = 1 in { "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)], IIC_SSE_MOVDQ>, Sched<[WriteStore]>; -} +} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 let Predicates = [UseAVX] in { let AddedComplexity = 15 in { diff --git a/test/CodeGen/X86/2012-1-10-buildvector.ll b/test/CodeGen/X86/2012-1-10-buildvector.ll index 2d1b5960d98..85f4c9eaebd 100644 --- a/test/CodeGen/X86/2012-1-10-buildvector.ll +++ b/test/CodeGen/X86/2012-1-10-buildvector.ll @@ -19,7 +19,7 @@ define void @bad_insert(i32 %t) { ; CHECK-LABEL: bad_insert: ; CHECK: # BB#0: ; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovaps %ymm0, (%eax) +; CHECK-NEXT: vmovdqa %ymm0, (%eax) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retl %v2 = insertelement <8 x i32> zeroinitializer, i32 %t, i32 0 diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll index 54f80982684..22d017e1247 100644 --- a/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/test/CodeGen/X86/avx2-vbroadcast.ll @@ -793,7 +793,7 @@ define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp { ; X64-AVX2-LABEL: _inreg0: ; X64-AVX2: ## BB#0: ; X64-AVX2-NEXT: vmovd %edi, %xmm0 -; X64-AVX2-NEXT: vbroadcastss %xmm0, %ymm0 +; X64-AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 ; X64-AVX2-NEXT: retq ; ; X64-AVX512VL-LABEL: _inreg0: @@ -1469,9 +1469,9 @@ define void @isel_crash_4d(i32* %cV_R.addr) { ; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; X64-AVX2-NEXT: movl (%rdi), %eax ; X64-AVX2-NEXT: vmovd %eax, %xmm1 -; X64-AVX2-NEXT: vbroadcastss %xmm1, %xmm1 +; X64-AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 ; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; X64-AVX2-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp) ; X64-AVX2-NEXT: retq ; ; X64-AVX512VL-LABEL: isel_crash_4d: @@ -1538,9 +1538,9 @@ define void @isel_crash_8d(i32* %cV_R.addr) { ; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp) ; X64-AVX2-NEXT: movl (%rdi), %eax ; X64-AVX2-NEXT: vmovd %eax, %xmm1 -; X64-AVX2-NEXT: vbroadcastss %xmm1, %ymm1 +; X64-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 ; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) -; X64-AVX2-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) ; X64-AVX2-NEXT: movq %rbp, %rsp ; X64-AVX2-NEXT: popq %rbp ; X64-AVX2-NEXT: vzeroupper @@ -1723,9 +1723,9 @@ define void @isel_crash_4q(i64* %cV_R.addr) { ; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp) ; X64-AVX2-NEXT: movq (%rdi), %rax ; X64-AVX2-NEXT: vmovq %rax, %xmm1 -; X64-AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 +; X64-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 ; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) -; X64-AVX2-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) ; X64-AVX2-NEXT: movq %rbp, %rsp ; X64-AVX2-NEXT: popq %rbp ; X64-AVX2-NEXT: vzeroupper diff --git a/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/test/CodeGen/X86/clear_upper_vector_element_bits.ll index 9ae3483062c..693bf2e17d5 100644 --- a/test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ b/test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -303,7 +303,7 @@ define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind { ; SSE: # BB#0: ; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: movd %eax, %xmm2 -; SSE-NEXT: movaps %xmm2, %xmm1 +; SSE-NEXT: movdqa %xmm2, %xmm1 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[2,0] diff --git a/test/CodeGen/X86/exedepsfix-broadcast.ll b/test/CodeGen/X86/exedepsfix-broadcast.ll index 992b3a395e7..e67bb0f9b7a 100644 --- a/test/CodeGen/X86/exedepsfix-broadcast.ll +++ b/test/CodeGen/X86/exedepsfix-broadcast.ll @@ -38,8 +38,8 @@ define <4 x float> @ExeDepsFix_broadcastss_inreg(<4 x float> %arg, <4 x float> % ; CHECK-LABEL: ExeDepsFix_broadcastss_inreg: ; CHECK: ## BB#0: ; CHECK-NEXT: vmovd %edi, %xmm2 -; CHECK-NEXT: vbroadcastss %xmm2, %xmm2 -; CHECK-NEXT: vandps %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpbroadcastd %xmm2, %xmm2 +; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %bitcast = bitcast <4 x float> %arg to <4 x i32> @@ -56,8 +56,8 @@ define <8 x float> @ExeDepsFix_broadcastss256_inreg(<8 x float> %arg, <8 x float ; CHECK-LABEL: ExeDepsFix_broadcastss256_inreg: ; CHECK: ## BB#0: ; CHECK-NEXT: vmovd %edi, %xmm2 -; CHECK-NEXT: vbroadcastss %xmm2, %ymm2 -; CHECK-NEXT: vandps %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vpbroadcastd %xmm2, %ymm2 +; CHECK-NEXT: vpand %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %bitcast = bitcast <8 x float> %arg to <8 x i32> @@ -124,8 +124,8 @@ define <4 x double> @ExeDepsFix_broadcastsd256_inreg(<4 x double> %arg, <4 x dou ; CHECK-LABEL: ExeDepsFix_broadcastsd256_inreg: ; CHECK: ## BB#0: ; CHECK-NEXT: vmovq %rdi, %xmm2 -; CHECK-NEXT: vbroadcastsd %xmm2, %ymm2 -; CHECK-NEXT: vandpd %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vpbroadcastq %xmm2, %ymm2 +; CHECK-NEXT: vpand %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %bitcast = bitcast <4 x double> %arg to <4 x i64> diff --git a/test/CodeGen/X86/extractelement-index.ll b/test/CodeGen/X86/extractelement-index.ll index fc34c56f6a4..13448a13ab4 100644 --- a/test/CodeGen/X86/extractelement-index.ll +++ b/test/CodeGen/X86/extractelement-index.ll @@ -244,12 +244,19 @@ define i32 @extractelement_v8i32_0(<8 x i32> %a) nounwind { ; SSE-NEXT: movd %xmm1, %eax ; SSE-NEXT: retq ; -; AVX-LABEL: extractelement_v8i32_0: -; AVX: # BB#0: -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; AVX1-LABEL: extractelement_v8i32_0: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: extractelement_v8i32_0: +; AVX2: # BB#0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq %b = extractelement <8 x i32> %a, i256 4 ret i32 %b } @@ -260,12 +267,19 @@ define i32 @extractelement_v8i32_4(<8 x i32> %a) nounwind { ; SSE-NEXT: movd %xmm1, %eax ; SSE-NEXT: retq ; -; AVX-LABEL: extractelement_v8i32_4: -; AVX: # BB#0: -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; AVX1-LABEL: extractelement_v8i32_4: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: extractelement_v8i32_4: +; AVX2: # BB#0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq %b = extractelement <8 x i32> %a, i256 4 ret i32 %b } diff --git a/test/CodeGen/X86/fp-logic.ll b/test/CodeGen/X86/fp-logic.ll index 301fa8f4137..d940101ecf5 100644 --- a/test/CodeGen/X86/fp-logic.ll +++ b/test/CodeGen/X86/fp-logic.ll @@ -76,7 +76,7 @@ define float @f5(float %x, i32 %y) { ; CHECK-LABEL: f5: ; CHECK: # BB#0: ; CHECK-NEXT: movd %edi, %xmm1 -; CHECK-NEXT: andps %xmm1, %xmm0 +; CHECK-NEXT: pand %xmm1, %xmm0 ; CHECK-NEXT: retq ; %bc1 = bitcast float %x to i32 @@ -91,7 +91,7 @@ define float @f6(float %x, i32 %y) { ; CHECK-LABEL: f6: ; CHECK: # BB#0: ; CHECK-NEXT: movd %edi, %xmm1 -; CHECK-NEXT: andps %xmm1, %xmm0 +; CHECK-NEXT: pand %xmm1, %xmm0 ; CHECK-NEXT: retq ; %bc1 = bitcast float %x to i32 @@ -135,7 +135,7 @@ define float @f8(float %x) { define i32 @f9(float %x, float %y) { ; CHECK-LABEL: f9: ; CHECK: # BB#0: -; CHECK-NEXT: andps %xmm1, %xmm0 +; CHECK-NEXT: pand %xmm1, %xmm0 ; CHECK-NEXT: movd %xmm0, %eax ; CHECK-NEXT: retq ; diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll index 98b1e87ef59..7e1837c42a9 100644 --- a/test/CodeGen/X86/masked_memop.ll +++ b/test/CodeGen/X86/masked_memop.ll @@ -1019,7 +1019,7 @@ define void @one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) { ; AVX512F-LABEL: one_mask_bit_set3: ; AVX512F: ## BB#0: ; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512F-NEXT: vmovq %xmm0, 16(%rdi) +; AVX512F-NEXT: vmovlps %xmm0, 16(%rdi) ; AVX512F-NEXT: retq ; ; SKX-LABEL: one_mask_bit_set3: diff --git a/test/CodeGen/X86/merge-consecutive-loads-128.ll b/test/CodeGen/X86/merge-consecutive-loads-128.ll index bb60440c9e9..1f2bd4bb0dc 100644 --- a/test/CodeGen/X86/merge-consecutive-loads-128.ll +++ b/test/CodeGen/X86/merge-consecutive-loads-128.ll @@ -935,26 +935,14 @@ define void @merge_4i32_i32_combine(<4 x i32>* %dst, i32* %src) { ; SSE-LABEL: merge_4i32_i32_combine: ; SSE: # BB#0: ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: movaps %xmm0, (%rdi) +; SSE-NEXT: movdqa %xmm0, (%rdi) ; SSE-NEXT: retq ; -; AVX1-LABEL: merge_4i32_i32_combine: -; AVX1: # BB#0: -; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX1-NEXT: vmovaps %xmm0, (%rdi) -; AVX1-NEXT: retq -; -; AVX2-LABEL: merge_4i32_i32_combine: -; AVX2: # BB#0: -; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX2-NEXT: vmovaps %xmm0, (%rdi) -; AVX2-NEXT: retq -; -; AVX512F-LABEL: merge_4i32_i32_combine: -; AVX512F: # BB#0: -; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512F-NEXT: vmovdqa %xmm0, (%rdi) -; AVX512F-NEXT: retq +; AVX-LABEL: merge_4i32_i32_combine: +; AVX: # BB#0: +; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vmovdqa %xmm0, (%rdi) +; AVX-NEXT: retq ; ; X32-SSE1-LABEL: merge_4i32_i32_combine: ; X32-SSE1: # BB#0: @@ -972,7 +960,7 @@ define void @merge_4i32_i32_combine(<4 x i32>* %dst, i32* %src) { ; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-SSE41-NEXT: movaps %xmm0, (%eax) +; X32-SSE41-NEXT: movdqa %xmm0, (%eax) ; X32-SSE41-NEXT: retl %1 = getelementptr i32, i32* %src, i32 0 %2 = load i32, i32* %1 diff --git a/test/CodeGen/X86/pshufb-mask-comments.ll b/test/CodeGen/X86/pshufb-mask-comments.ll index 8364915fa0d..d447bf9b9b8 100644 --- a/test/CodeGen/X86/pshufb-mask-comments.ll +++ b/test/CodeGen/X86/pshufb-mask-comments.ll @@ -56,7 +56,7 @@ define <16 x i8> @test5(<16 x i8> %V) { ; CHECK: # BB#0: ; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: movd %rax, %xmm1 -; CHECK-NEXT: movaps %xmm1, (%rax) +; CHECK-NEXT: movdqa %xmm1, (%rax) ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [1,1] ; CHECK-NEXT: movdqa %xmm1, (%rax) ; CHECK-NEXT: pshufb %xmm1, %xmm0 diff --git a/test/CodeGen/X86/scalar-int-to-fp.ll b/test/CodeGen/X86/scalar-int-to-fp.ll index 9ea86b08f7a..d39b206f3aa 100644 --- a/test/CodeGen/X86/scalar-int-to-fp.ll +++ b/test/CodeGen/X86/scalar-int-to-fp.ll @@ -75,7 +75,7 @@ define x86_fp80 @s32_to_x(i32 %a) nounwind { ; CHECK-LABEL: u64_to_f ; AVX512_32: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX512_32: vmovlps %xmm0, {{[0-9]+}}(%esp) +; AVX512_32: vmovq %xmm0, {{[0-9]+}}(%esp) ; AVX512_32: fildll ; AVX512_64: vcvtusi2ssq diff --git a/test/CodeGen/X86/uint_to_fp-2.ll b/test/CodeGen/X86/uint_to_fp-2.ll index d2b78a8886f..fbfd1bdd9d0 100644 --- a/test/CodeGen/X86/uint_to_fp-2.ll +++ b/test/CodeGen/X86/uint_to_fp-2.ll @@ -8,7 +8,7 @@ define float @test1(i32 %x) nounwind readnone { ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: orpd %xmm0, %xmm1 +; CHECK-NEXT: por %xmm0, %xmm1 ; CHECK-NEXT: subsd %xmm0, %xmm1 ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsd2ss %xmm1, %xmm0 diff --git a/test/CodeGen/X86/vec_fp_to_int.ll b/test/CodeGen/X86/vec_fp_to_int.ll index 4641e4a956a..4ae95ba5437 100644 --- a/test/CodeGen/X86/vec_fp_to_int.ll +++ b/test/CodeGen/X86/vec_fp_to_int.ll @@ -2343,7 +2343,7 @@ define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind { ; SSE-NEXT: movq %rcx, %rsi ; SSE-NEXT: callq __fixtfdi ; SSE-NEXT: movd %rax, %xmm0 -; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill ; SSE-NEXT: movq %rbx, %rdi ; SSE-NEXT: movq %r14, %rsi ; SSE-NEXT: callq __fixtfdi @@ -2368,7 +2368,7 @@ define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind { ; VEX-NEXT: movq %rcx, %rsi ; VEX-NEXT: callq __fixtfdi ; VEX-NEXT: vmovq %rax, %xmm0 -; VEX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; VEX-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill ; VEX-NEXT: movq %rbx, %rdi ; VEX-NEXT: movq %r14, %rsi ; VEX-NEXT: callq __fixtfdi diff --git a/test/CodeGen/X86/vec_ins_extract-1.ll b/test/CodeGen/X86/vec_ins_extract-1.ll index 8019e11ad4c..8adc0e61f8a 100644 --- a/test/CodeGen/X86/vec_ins_extract-1.ll +++ b/test/CodeGen/X86/vec_ins_extract-1.ll @@ -90,7 +90,7 @@ define <4 x i32> @t3(i32 inreg %t7, <4 x i32> inreg %t8) nounwind { ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: andl $-16, %esp ; X32-NEXT: subl $32, %esp -; X32-NEXT: movaps %xmm0, (%esp) +; X32-NEXT: movdqa %xmm0, (%esp) ; X32-NEXT: movd %xmm0, (%esp,%eax,4) ; X32-NEXT: movaps (%esp), %xmm0 ; X32-NEXT: movl %ebp, %esp @@ -99,7 +99,7 @@ define <4 x i32> @t3(i32 inreg %t7, <4 x i32> inreg %t8) nounwind { ; ; X64-LABEL: t3: ; X64: # BB#0: -; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: movslq %edi, %rax ; X64-NEXT: movd %xmm0, -24(%rsp,%rax,4) ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0