mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-28 08:02:08 +00:00
[X86] Remove unnecessary (v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X)))) patterns
We had already disabled the pattern for SSE4.1 and SSE4.2. But it got re-enabled for AVX and AVX512. With SSE41 we rely on a separate (v4f32 (X86vzmovl VR128)) pattern to select blendps with a xorps to create zeroess. And a separate (v4f32 (scalar_to_vector FR32X)) to select a COPY_TO_REG_CLASS to move FR32 to VR128 The same thing can happen for AVX with vblendps and those separate patterns already exist. For AVX512, (v4f32 (X86vzmov VR128)) will select a VMOVSS instruction instead of VBLENDPS due to their not being a EVEX VBLENDPS. This is what we were getting out of the larger pattern anyway. So the larger pattern is unneeded for AVX512 too. For SSE1-SSSE3 we can rely on (v4f32 (X86vzmov VR128)) selecting a MOVSS similar to AVX512. Again this is what the larger pattern did too. So the only real change here is that AVX1/2 now properly outputs a VBLENDPS during isel instead of a VMOVSS to match SSE41. Most tests didn't notice because the two address instruction pass knows how to turn VMOVSS into VBLENDPS to get an independent destination register. llvm-svn: 312564
This commit is contained in:
parent
80528702c9
commit
784fa8a4e3
@ -4606,10 +4606,6 @@ let Constraints = "$src0 = $dst" in
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
let AddedComplexity = 15 in {
|
||||
// Move scalar to XMM zero-extended, zeroing a VR128X then do a
|
||||
// MOVS{S,D} to the lower bits.
|
||||
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
|
||||
(VMOVSSZrr (v4f32 (AVX512_128_SET0)), FR32X:$src)>;
|
||||
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
|
||||
(VMOVSSZrr (v4f32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
|
||||
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
|
||||
|
@ -678,8 +678,6 @@ let Predicates = [UseSSE1] in {
|
||||
let Predicates = [NoSSE41], AddedComplexity = 15 in {
|
||||
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
|
||||
// MOVSS to the lower bits.
|
||||
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
|
||||
(MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
|
||||
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
|
||||
(MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
|
||||
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
|
||||
@ -6962,10 +6960,6 @@ let Predicates = [HasAVX2] in {
|
||||
// movs[s/d] are 1-2 byte shorter instructions.
|
||||
let Predicates = [UseAVX] in {
|
||||
let AddedComplexity = 15 in {
|
||||
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
|
||||
// MOVS{S,D} to the lower bits.
|
||||
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
|
||||
(VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
|
||||
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
|
||||
(VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
|
||||
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
|
||||
|
@ -35,9 +35,9 @@ define i16 @test1(float %f) nounwind {
|
||||
; X32_AVX1-LABEL: test1:
|
||||
; X32_AVX1: ## BB#0:
|
||||
; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X32_AVX1-NEXT: vaddss LCPI0_0, %xmm0, %xmm0
|
||||
; X32_AVX1-NEXT: vmulss LCPI0_1, %xmm0, %xmm0
|
||||
; X32_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X32_AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; X32_AVX1-NEXT: vminss LCPI0_2, %xmm0, %xmm0
|
||||
; X32_AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0
|
||||
@ -47,9 +47,9 @@ define i16 @test1(float %f) nounwind {
|
||||
;
|
||||
; X64_AVX1-LABEL: test1:
|
||||
; X64_AVX1: ## BB#0:
|
||||
; X64_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64_AVX1-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64_AVX1-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64_AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; X64_AVX1-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64_AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0
|
||||
@ -60,9 +60,9 @@ define i16 @test1(float %f) nounwind {
|
||||
; X32_AVX512-LABEL: test1:
|
||||
; X32_AVX512: ## BB#0:
|
||||
; X32_AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X32_AVX512-NEXT: vaddss LCPI0_0, %xmm0, %xmm0
|
||||
; X32_AVX512-NEXT: vmulss LCPI0_1, %xmm0, %xmm0
|
||||
; X32_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X32_AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; X32_AVX512-NEXT: vminss LCPI0_2, %xmm0, %xmm0
|
||||
; X32_AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
|
||||
@ -72,9 +72,9 @@ define i16 @test1(float %f) nounwind {
|
||||
;
|
||||
; X64_AVX512-LABEL: test1:
|
||||
; X64_AVX512: ## BB#0:
|
||||
; X64_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64_AVX512-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64_AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64_AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; X64_AVX512-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64_AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
|
||||
|
Loading…
Reference in New Issue
Block a user