mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-26 22:45:05 +00:00
[X86][SSE] Add awareness of (v)cvtpd2dq and vcvtpd2udq implicit zeroing of upper 64-bits of xmm result
We've already added the equivalent for (v)cvttpd2dq (rL284459) and vcvttpd2udq git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287835 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
38d2f633f1
commit
89e45d5649
@ -6546,13 +6546,20 @@ def : Pat<(v2f64 (X86cvtudq2pd (v4i32 VR128X:$src1))),
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512, HasVLX] in {
|
||||
let AddedComplexity = 15 in
|
||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
||||
(v4i32 (X86cvttpd2dq (v2f64 VR128X:$src)))))),
|
||||
(VCVTTPD2DQZ128rr VR128:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
|
||||
(v4i32 (X86cvttpd2udq (v2f64 VR128X:$src)))))))),
|
||||
(VCVTTPD2UDQZ128rr VR128:$src)>;
|
||||
let AddedComplexity = 15 in {
|
||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
||||
(v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
|
||||
(VCVTPD2DQZ128rr VR128:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
|
||||
(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))))),
|
||||
(VCVTPD2UDQZ128rr VR128:$src)>;
|
||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
||||
(v4i32 (X86cvttpd2dq (v2f64 VR128X:$src)))))),
|
||||
(VCVTTPD2DQZ128rr VR128:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
|
||||
(v4i32 (X86cvttpd2udq (v2f64 VR128X:$src)))))))),
|
||||
(VCVTTPD2UDQZ128rr VR128:$src)>;
|
||||
}
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
|
@ -2083,10 +2083,14 @@ def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
|
||||
(VCVTTPD2DQYrm VR128:$dst, f256mem:$src), 0>;
|
||||
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
let AddedComplexity = 15 in
|
||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
||||
(v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))),
|
||||
(VCVTTPD2DQrr VR128:$src)>;
|
||||
let AddedComplexity = 15 in {
|
||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
||||
(v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
|
||||
(VCVTPD2DQrr VR128:$src)>;
|
||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
||||
(v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))),
|
||||
(VCVTTPD2DQrr VR128:$src)>;
|
||||
}
|
||||
} // Predicates = [HasAVX]
|
||||
|
||||
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
@ -2101,10 +2105,14 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
|
||||
IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>;
|
||||
|
||||
let Predicates = [UseSSE2] in {
|
||||
let AddedComplexity = 15 in
|
||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
||||
(v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))),
|
||||
(CVTTPD2DQrr VR128:$src)>;
|
||||
let AddedComplexity = 15 in {
|
||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
||||
(v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
|
||||
(CVTPD2DQrr VR128:$src)>;
|
||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
||||
(v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))),
|
||||
(CVTTPD2DQrr VR128:$src)>;
|
||||
}
|
||||
} // Predicates = [UseSSE2]
|
||||
|
||||
// Convert packed single to packed double
|
||||
|
@ -3055,8 +3055,6 @@ define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128_zext(<2 x double> %x0, <
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
|
||||
; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0xe6,0xc0]
|
||||
; CHECK-NEXT: vmovq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x7e,0xc0]
|
||||
; CHECK-NEXT: ## xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
|
||||
@ -3151,8 +3149,6 @@ define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128_zext(<2 x double> %x0,
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
|
||||
; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0]
|
||||
; CHECK-NEXT: vmovq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x7e,0xc0]
|
||||
; CHECK-NEXT: ## xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
|
||||
|
@ -255,22 +255,16 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind {
|
||||
; SSE-LABEL: test_mm_cvtpd_epi32_zext:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: cvtpd2dq %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0xe6,0xc0]
|
||||
; SSE-NEXT: movq %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x7e,0xc0]
|
||||
; SSE-NEXT: ## xmm0 = xmm0[0],zero
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX2-LABEL: test_mm_cvtpd_epi32_zext:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xe6,0xc0]
|
||||
; AVX2-NEXT: vmovq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x7e,0xc0]
|
||||
; AVX2-NEXT: ## xmm0 = xmm0[0],zero
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_mm_cvtpd_epi32_zext:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0xe6,0xc0]
|
||||
; SKX-NEXT: vmovq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x7e,0xc0]
|
||||
; SKX-NEXT: ## xmm0 = xmm0[0],zero
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
%cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
|
||||
%res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
|
Loading…
x
Reference in New Issue
Block a user