mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-13 14:46:53 +00:00
[X86] When pattern-matching scalar FMA3 intrinsics, don't re-arrange the first and second operands.
The semantics of the scalar FMA intrinsics are that the high vector elements are copied from the first source. The existing pattern switches src1 and src2 around, to match the "213" order, which ends up tying the original src2 to the dest. Since the actual scalar fma3 instructions copy the high elements from the dest register, the wrong values are copied. This modifies the pattern to leave src1 and src2 in their original order. Differential Revision: http://reviews.llvm.org/D9908 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238131 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
17b7d6bf25
commit
8ffbb68a86
@ -183,19 +183,24 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
||||
defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "PD", IntF64, OpNode,
|
||||
FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
|
||||
|
||||
// These patterns use the 123 ordering, instead of 213, even though
|
||||
// they match the intrinsic to the 213 version of the instruction.
|
||||
// This is because src1 is tied to dest, and the scalar intrinsics
|
||||
// require the pass-through values to come from the first source
|
||||
// operand, not the second.
|
||||
def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(NAME#"SSr213r")
|
||||
(COPY_TO_REGCLASS $src2, FR32),
|
||||
(COPY_TO_REGCLASS $src1, FR32),
|
||||
(COPY_TO_REGCLASS $src2, FR32),
|
||||
(COPY_TO_REGCLASS $src3, FR32)),
|
||||
VR128)>;
|
||||
|
||||
def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(NAME#"SDr213r")
|
||||
(COPY_TO_REGCLASS $src2, FR64),
|
||||
(COPY_TO_REGCLASS $src1, FR64),
|
||||
(COPY_TO_REGCLASS $src2, FR64),
|
||||
(COPY_TO_REGCLASS $src3, FR64)),
|
||||
VR128)>;
|
||||
}
|
||||
|
@ -3,7 +3,9 @@
|
||||
; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s
|
||||
|
||||
define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; CHECK: fmadd213ss (%r8), %xmm
|
||||
; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
|
||||
; CHECK: fmadd213ss (%r8), [[XMM1]], [[XMM0]]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
@ -24,7 +26,9 @@ define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x f
|
||||
declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
|
||||
|
||||
define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; CHECK: fnmadd213ss (%r8), %xmm
|
||||
; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
|
||||
; CHECK: fnmadd213ss (%r8), [[XMM1]], [[XMM0]]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
@ -46,7 +50,9 @@ declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x
|
||||
|
||||
|
||||
define <4 x float> @test_x86_fmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; CHECK: fmsub213ss
|
||||
; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
|
||||
; CHECK: fmsub213ss (%r8), [[XMM1]], [[XMM0]]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
@ -60,7 +66,9 @@ define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x flo
|
||||
declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <4 x float> @test_x86_fnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; CHECK: fnmsub213ss
|
||||
; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
|
||||
; CHECK: fnmsub213ss (%r8), [[XMM1]], [[XMM0]]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
@ -76,7 +84,9 @@ declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x floa
|
||||
;;;;
|
||||
|
||||
define <2 x double> @test_x86_fmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
; CHECK: fmadd213sd
|
||||
; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
|
||||
; CHECK: fmadd213sd (%r8), [[XMM1]], [[XMM0]]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
@ -90,7 +100,9 @@ define <2 x double> @test_x86_fmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x
|
||||
declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define <2 x double> @test_x86_fnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
; CHECK: fnmadd213sd
|
||||
; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
|
||||
; CHECK: fnmadd213sd (%r8), [[XMM1]], [[XMM0]]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
@ -106,7 +118,9 @@ declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x d
|
||||
|
||||
|
||||
define <2 x double> @test_x86_fmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
; CHECK: fmsub213sd
|
||||
; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
|
||||
; CHECK: fmsub213sd (%r8), [[XMM1]], [[XMM0]]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
@ -120,7 +134,9 @@ define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x
|
||||
declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define <2 x double> @test_x86_fnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
; CHECK: fnmsub213sd
|
||||
; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
|
||||
; CHECK: fnmsub213sd (%r8), [[XMM1]], [[XMM0]]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user