mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-12 14:17:59 +00:00
[X86][SSE] Consistently set MOVD/MOVQ load/store/move instructions to integer domain
We are being inconsistent with these instructions (and all their variants.....) with a random mix of them using the default float domain. Differential Revision: https://reviews.llvm.org/D27419 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288902 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
dbcb7adb03
commit
d2a4d816a1
@ -3127,12 +3127,13 @@ let Predicates = [HasVLX] in {
|
||||
(VMOVDQU32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
|
||||
}
|
||||
|
||||
|
||||
// Move Int Doubleword to Packed Double Int
|
||||
//
|
||||
def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128X:$dst,
|
||||
|
||||
// Move Int Doubleword to Packed Double Int
|
||||
//
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128X:$dst,
|
||||
(v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
|
||||
EVEX;
|
||||
def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
|
||||
@ -3162,43 +3163,47 @@ def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src
|
||||
def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
|
||||
"vmovq\t{$src, $dst|$dst, $src}",
|
||||
[(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
|
||||
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
}
|
||||
|
||||
// Move Int Doubleword to Single Scalar
|
||||
//
|
||||
let isCodeGenOnly = 1 in {
|
||||
def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32X:$dst, (bitconvert GR32:$src))],
|
||||
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
}
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
// Move Int Doubleword to Single Scalar
|
||||
//
|
||||
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
|
||||
def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32X:$dst, (bitconvert GR32:$src))],
|
||||
IIC_SSE_MOVDQ>, EVEX;
|
||||
|
||||
def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
|
||||
IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
|
||||
}
|
||||
|
||||
// Move doubleword from xmm register to r/m32
|
||||
//
|
||||
def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
|
||||
IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
|
||||
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
|
||||
|
||||
// Move doubleword from xmm register to r/m32
|
||||
//
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
|
||||
(iPTR 0)))], IIC_SSE_MOVD_ToGP>,
|
||||
EVEX;
|
||||
def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
|
||||
(ins i32mem:$dst, VR128X:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(store (i32 (extractelt (v4i32 VR128X:$src),
|
||||
(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
|
||||
EVEX, EVEX_CD8<32, CD8VT1>;
|
||||
|
||||
// Move quadword from xmm1 register to r/m64
|
||||
//
|
||||
def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
|
||||
"vmovq\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
|
||||
[(store (i32 (extractelt (v4i32 VR128X:$src),
|
||||
(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
|
||||
EVEX, EVEX_CD8<32, CD8VT1>;
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
// Move quadword from xmm1 register to r/m64
|
||||
//
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
|
||||
"vmovq\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
|
||||
(iPTR 0)))],
|
||||
IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
|
||||
Requires<[HasAVX512, In64BitMode]>;
|
||||
@ -3219,36 +3224,39 @@ def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
|
||||
|
||||
let hasSideEffects = 0 in
|
||||
def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
|
||||
(ins VR128X:$src),
|
||||
"vmovq.s\t{$src, $dst|$dst, $src}",[]>,
|
||||
EVEX, VEX_W;
|
||||
|
||||
// Move Scalar Single to Double Int
|
||||
//
|
||||
let isCodeGenOnly = 1 in {
|
||||
def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
|
||||
(ins FR32X:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
(ins VR128X:$src),
|
||||
"vmovq.s\t{$src, $dst|$dst, $src}",[]>,
|
||||
EVEX, VEX_W;
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
// Move Scalar Single to Double Int
|
||||
//
|
||||
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
|
||||
def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
|
||||
(ins FR32X:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (bitconvert FR32X:$src))],
|
||||
IIC_SSE_MOVD_ToGP>, EVEX;
|
||||
def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
|
||||
(ins i32mem:$dst, FR32X:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
|
||||
IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
|
||||
}
|
||||
|
||||
// Move Quadword Int to Packed Quadword Int
|
||||
//
|
||||
def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
|
||||
(ins i64mem:$src),
|
||||
"vmovq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128X:$dst,
|
||||
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
|
||||
EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 MOVSS, MOVSD
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
|
||||
IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
|
||||
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
|
||||
|
||||
// Move Quadword Int to Packed Quadword Int
|
||||
//
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
|
||||
(ins i64mem:$src),
|
||||
"vmovq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128X:$dst,
|
||||
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
|
||||
EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 MOVSS, MOVSD
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass avx512_move_scalar<string asm, SDNode OpNode,
|
||||
|
@ -4626,6 +4626,7 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Move Int Doubleword to Packed Double Int
|
||||
//
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
def VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
|
||||
"movd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
@ -4676,11 +4677,12 @@ def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
|
||||
"mov{d|q}\t{$src, $dst|$dst, $src}",
|
||||
[(set FR64:$dst, (bitconvert GR64:$src))],
|
||||
IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Move Int Doubleword to Single Scalar
|
||||
//
|
||||
let isCodeGenOnly = 1 in {
|
||||
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
|
||||
def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
|
||||
"movd\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32:$dst, (bitconvert GR32:$src))],
|
||||
@ -4700,11 +4702,12 @@ let isCodeGenOnly = 1 in {
|
||||
"movd\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
|
||||
IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
|
||||
}
|
||||
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Move Packed Doubleword Int to Packed Double Int
|
||||
//
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
def VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
|
||||
"movd\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (extractelt (v4i32 VR128:$src),
|
||||
@ -4726,6 +4729,7 @@ def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
|
||||
[(store (i32 (extractelt (v4i32 VR128:$src),
|
||||
(iPTR 0))), addr:$dst)],
|
||||
IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
def : Pat<(v8i32 (X86Vinsert (v8i32 immAllZerosV), GR32:$src2, (iPTR 0))),
|
||||
(SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>;
|
||||
@ -4742,6 +4746,7 @@ def : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Move Packed Doubleword Int first element to Doubleword Int
|
||||
//
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
let SchedRW = [WriteMove] in {
|
||||
def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}",
|
||||
@ -4766,11 +4771,12 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
|
||||
def MOVPQIto64rm : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
|
||||
"mov{d|q}\t{$src, $dst|$dst, $src}",
|
||||
[], IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Bitcast FR64 <-> GR64
|
||||
//
|
||||
let isCodeGenOnly = 1 in {
|
||||
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
|
||||
let Predicates = [UseAVX] in
|
||||
def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}",
|
||||
@ -4797,12 +4803,12 @@ let isCodeGenOnly = 1 in {
|
||||
"movq\t{$src, $dst|$dst, $src}",
|
||||
[(store (i64 (bitconvert FR64:$src)), addr:$dst)],
|
||||
IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
|
||||
}
|
||||
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Move Scalar Single to Double Int
|
||||
//
|
||||
let isCodeGenOnly = 1 in {
|
||||
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
|
||||
def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
|
||||
"movd\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (bitconvert FR32:$src))],
|
||||
@ -4819,7 +4825,7 @@ let isCodeGenOnly = 1 in {
|
||||
"movd\t{$src, $dst|$dst, $src}",
|
||||
[(store (i32 (bitconvert FR32:$src)), addr:$dst)],
|
||||
IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
|
||||
}
|
||||
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
|
||||
|
||||
let Predicates = [UseAVX] in {
|
||||
let AddedComplexity = 15 in {
|
||||
|
@ -19,7 +19,7 @@ define void @bad_insert(i32 %t) {
|
||||
; CHECK-LABEL: bad_insert:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovaps %ymm0, (%eax)
|
||||
; CHECK-NEXT: vmovdqa %ymm0, (%eax)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retl
|
||||
%v2 = insertelement <8 x i32> zeroinitializer, i32 %t, i32 0
|
||||
|
@ -793,7 +793,7 @@ define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp {
|
||||
; X64-AVX2-LABEL: _inreg0:
|
||||
; X64-AVX2: ## BB#0:
|
||||
; X64-AVX2-NEXT: vmovd %edi, %xmm0
|
||||
; X64-AVX2-NEXT: vbroadcastss %xmm0, %ymm0
|
||||
; X64-AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
|
||||
; X64-AVX2-NEXT: retq
|
||||
;
|
||||
; X64-AVX512VL-LABEL: _inreg0:
|
||||
@ -1469,9 +1469,9 @@ define void @isel_crash_4d(i32* %cV_R.addr) {
|
||||
; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: movl (%rdi), %eax
|
||||
; X64-AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; X64-AVX2-NEXT: vbroadcastss %xmm1, %xmm1
|
||||
; X64-AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
|
||||
; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: retq
|
||||
;
|
||||
; X64-AVX512VL-LABEL: isel_crash_4d:
|
||||
@ -1538,9 +1538,9 @@ define void @isel_crash_8d(i32* %cV_R.addr) {
|
||||
; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; X64-AVX2-NEXT: movl (%rdi), %eax
|
||||
; X64-AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; X64-AVX2-NEXT: vbroadcastss %xmm1, %ymm1
|
||||
; X64-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
|
||||
; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: movq %rbp, %rsp
|
||||
; X64-AVX2-NEXT: popq %rbp
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
@ -1723,9 +1723,9 @@ define void @isel_crash_4q(i64* %cV_R.addr) {
|
||||
; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; X64-AVX2-NEXT: movq (%rdi), %rax
|
||||
; X64-AVX2-NEXT: vmovq %rax, %xmm1
|
||||
; X64-AVX2-NEXT: vbroadcastsd %xmm1, %ymm1
|
||||
; X64-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
|
||||
; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: movq %rbp, %rsp
|
||||
; X64-AVX2-NEXT: popq %rbp
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
|
@ -303,7 +303,7 @@ define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind {
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: xorl %eax, %eax
|
||||
; SSE-NEXT: movd %eax, %xmm2
|
||||
; SSE-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[2,0]
|
||||
|
@ -38,8 +38,8 @@ define <4 x float> @ExeDepsFix_broadcastss_inreg(<4 x float> %arg, <4 x float> %
|
||||
; CHECK-LABEL: ExeDepsFix_broadcastss_inreg:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovd %edi, %xmm2
|
||||
; CHECK-NEXT: vbroadcastss %xmm2, %xmm2
|
||||
; CHECK-NEXT: vandps %xmm2, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpbroadcastd %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%bitcast = bitcast <4 x float> %arg to <4 x i32>
|
||||
@ -56,8 +56,8 @@ define <8 x float> @ExeDepsFix_broadcastss256_inreg(<8 x float> %arg, <8 x float
|
||||
; CHECK-LABEL: ExeDepsFix_broadcastss256_inreg:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovd %edi, %xmm2
|
||||
; CHECK-NEXT: vbroadcastss %xmm2, %ymm2
|
||||
; CHECK-NEXT: vandps %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpbroadcastd %xmm2, %ymm2
|
||||
; CHECK-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%bitcast = bitcast <8 x float> %arg to <8 x i32>
|
||||
@ -124,8 +124,8 @@ define <4 x double> @ExeDepsFix_broadcastsd256_inreg(<4 x double> %arg, <4 x dou
|
||||
; CHECK-LABEL: ExeDepsFix_broadcastsd256_inreg:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovq %rdi, %xmm2
|
||||
; CHECK-NEXT: vbroadcastsd %xmm2, %ymm2
|
||||
; CHECK-NEXT: vandpd %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpbroadcastq %xmm2, %ymm2
|
||||
; CHECK-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%bitcast = bitcast <4 x double> %arg to <4 x i64>
|
||||
|
@ -244,12 +244,19 @@ define i32 @extractelement_v8i32_0(<8 x i32> %a) nounwind {
|
||||
; SSE-NEXT: movd %xmm1, %eax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extractelement_v8i32_0:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
; AVX1-LABEL: extractelement_v8i32_0:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: extractelement_v8i32_0:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
%b = extractelement <8 x i32> %a, i256 4
|
||||
ret i32 %b
|
||||
}
|
||||
@ -260,12 +267,19 @@ define i32 @extractelement_v8i32_4(<8 x i32> %a) nounwind {
|
||||
; SSE-NEXT: movd %xmm1, %eax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extractelement_v8i32_4:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
; AVX1-LABEL: extractelement_v8i32_4:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: extractelement_v8i32_4:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
%b = extractelement <8 x i32> %a, i256 4
|
||||
ret i32 %b
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ define float @f5(float %x, i32 %y) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movd %edi, %xmm1
|
||||
; CHECK-NEXT: andps %xmm1, %xmm0
|
||||
; CHECK-NEXT: pand %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
%bc1 = bitcast float %x to i32
|
||||
@ -91,7 +91,7 @@ define float @f6(float %x, i32 %y) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movd %edi, %xmm1
|
||||
; CHECK-NEXT: andps %xmm1, %xmm0
|
||||
; CHECK-NEXT: pand %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
%bc1 = bitcast float %x to i32
|
||||
@ -135,7 +135,7 @@ define float @f8(float %x) {
|
||||
define i32 @f9(float %x, float %y) {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: andps %xmm1, %xmm0
|
||||
; CHECK-NEXT: pand %xmm1, %xmm0
|
||||
; CHECK-NEXT: movd %xmm0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
|
@ -1019,7 +1019,7 @@ define void @one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) {
|
||||
; AVX512F-LABEL: one_mask_bit_set3:
|
||||
; AVX512F: ## BB#0:
|
||||
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX512F-NEXT: vmovq %xmm0, 16(%rdi)
|
||||
; AVX512F-NEXT: vmovlps %xmm0, 16(%rdi)
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: one_mask_bit_set3:
|
||||
|
@ -935,26 +935,14 @@ define void @merge_4i32_i32_combine(<4 x i32>* %dst, i32* %src) {
|
||||
; SSE-LABEL: merge_4i32_i32_combine:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: movaps %xmm0, (%rdi)
|
||||
; SSE-NEXT: movdqa %xmm0, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: merge_4i32_i32_combine:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovaps %xmm0, (%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: merge_4i32_i32_combine:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovaps %xmm0, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: merge_4i32_i32_combine:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512F-NEXT: vmovdqa %xmm0, (%rdi)
|
||||
; AVX512F-NEXT: retq
|
||||
; AVX-LABEL: merge_4i32_i32_combine:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vmovdqa %xmm0, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-SSE1-LABEL: merge_4i32_i32_combine:
|
||||
; X32-SSE1: # BB#0:
|
||||
@ -972,7 +960,7 @@ define void @merge_4i32_i32_combine(<4 x i32>* %dst, i32* %src) {
|
||||
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-SSE41-NEXT: movaps %xmm0, (%eax)
|
||||
; X32-SSE41-NEXT: movdqa %xmm0, (%eax)
|
||||
; X32-SSE41-NEXT: retl
|
||||
%1 = getelementptr i32, i32* %src, i32 0
|
||||
%2 = load i32, i32* %1
|
||||
|
@ -56,7 +56,7 @@ define <16 x i8> @test5(<16 x i8> %V) {
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movl $1, %eax
|
||||
; CHECK-NEXT: movd %rax, %xmm1
|
||||
; CHECK-NEXT: movaps %xmm1, (%rax)
|
||||
; CHECK-NEXT: movdqa %xmm1, (%rax)
|
||||
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [1,1]
|
||||
; CHECK-NEXT: movdqa %xmm1, (%rax)
|
||||
; CHECK-NEXT: pshufb %xmm1, %xmm0
|
||||
|
@ -75,7 +75,7 @@ define x86_fp80 @s32_to_x(i32 %a) nounwind {
|
||||
|
||||
; CHECK-LABEL: u64_to_f
|
||||
; AVX512_32: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512_32: vmovlps %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512_32: vmovq %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512_32: fildll
|
||||
|
||||
; AVX512_64: vcvtusi2ssq
|
||||
|
@ -8,7 +8,7 @@ define float @test1(i32 %x) nounwind readnone {
|
||||
; CHECK-NEXT: pushl %eax
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: orpd %xmm0, %xmm1
|
||||
; CHECK-NEXT: por %xmm0, %xmm1
|
||||
; CHECK-NEXT: subsd %xmm0, %xmm1
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtsd2ss %xmm1, %xmm0
|
||||
|
@ -2343,7 +2343,7 @@ define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind {
|
||||
; SSE-NEXT: movq %rcx, %rsi
|
||||
; SSE-NEXT: callq __fixtfdi
|
||||
; SSE-NEXT: movd %rax, %xmm0
|
||||
; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
||||
; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
|
||||
; SSE-NEXT: movq %rbx, %rdi
|
||||
; SSE-NEXT: movq %r14, %rsi
|
||||
; SSE-NEXT: callq __fixtfdi
|
||||
@ -2368,7 +2368,7 @@ define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind {
|
||||
; VEX-NEXT: movq %rcx, %rsi
|
||||
; VEX-NEXT: callq __fixtfdi
|
||||
; VEX-NEXT: vmovq %rax, %xmm0
|
||||
; VEX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
||||
; VEX-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
|
||||
; VEX-NEXT: movq %rbx, %rdi
|
||||
; VEX-NEXT: movq %r14, %rsi
|
||||
; VEX-NEXT: callq __fixtfdi
|
||||
|
@ -90,7 +90,7 @@ define <4 x i32> @t3(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
|
||||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: andl $-16, %esp
|
||||
; X32-NEXT: subl $32, %esp
|
||||
; X32-NEXT: movaps %xmm0, (%esp)
|
||||
; X32-NEXT: movdqa %xmm0, (%esp)
|
||||
; X32-NEXT: movd %xmm0, (%esp,%eax,4)
|
||||
; X32-NEXT: movaps (%esp), %xmm0
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
@ -99,7 +99,7 @@ define <4 x i32> @t3(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
|
||||
;
|
||||
; X64-LABEL: t3:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movslq %edi, %rax
|
||||
; X64-NEXT: movd %xmm0, -24(%rsp,%rax,4)
|
||||
; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
|
||||
|
Loading…
Reference in New Issue
Block a user