[X86] Add a missing FMA3 scalar intrinsic pattern.

This allows us to use 231 form to fold an insertelement on the add input to the fma. There is technically no software intrinsic that can use this until AVX512F, but it can be manually built up from other intrinsics.

llvm-svn: 337223
This commit is contained in:
Craig Topper 2018-07-16 23:10:58 +00:00
parent 5c92980d35
commit b41eac7471
2 changed files with 38 additions and 0 deletions

View File

@ -340,6 +340,13 @@ multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
(VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
(Op RC:$src2, RC:$src3,
(EltVT (extractelt (VT VR128:$src1), (iPTR 0)))))))),
(!cast<Instruction>(Prefix#"231"#Suffix#"r_Int")
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
(VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
(Op RC:$src2,
(EltVT (extractelt (VT VR128:$src1), (iPTR 0))),

View File

@ -64,6 +64,37 @@ define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1,
ret <4 x float> %5
}
define <4 x float> @test_x86_fma_vfmadd_ss_231(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ss_231:
; CHECK-FMA: # %bb.0:
; CHECK-FMA-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 # encoding: [0xc4,0xe2,0x79,0xb9,0xd1]
; CHECK-FMA-NEXT: # xmm2 = (xmm0 * xmm1) + xmm2
; CHECK-FMA-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
; CHECK-FMA-NEXT: retq # encoding: [0xc3]
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ss_231:
; CHECK-AVX512VL: # %bb.0:
; CHECK-AVX512VL-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0xd1]
; CHECK-AVX512VL-NEXT: # xmm2 = (xmm0 * xmm1) + xmm2
; CHECK-AVX512VL-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ss_231:
; CHECK-FMA-WIN: # %bb.0:
; CHECK-FMA-WIN-NEXT: vmovaps (%r8), %xmm0 # encoding: [0xc4,0xc1,0x78,0x28,0x00]
; CHECK-FMA-WIN-NEXT: vmovss (%rcx), %xmm1 # encoding: [0xc5,0xfa,0x10,0x09]
; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero
; CHECK-FMA-WIN-NEXT: vfmadd231ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xb9,0x02]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * mem) + xmm0
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
%1 = extractelement <4 x float> %a0, i64 0
%2 = extractelement <4 x float> %a1, i64 0
%3 = extractelement <4 x float> %a2, i64 0
%4 = call float @llvm.fma.f32(float %1, float %2, float %3)
%5 = insertelement <4 x float> %a2, float %4, i64 0
ret <4 x float> %5
}
define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_sd:
; CHECK-FMA: # %bb.0: