mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-26 22:34:39 +00:00
Mark FMA4 instructions as commutable and add them to the folding tables.
llvm-svn: 163035
This commit is contained in:
parent
48ba96b707
commit
2e53378ff6
@ -200,6 +200,7 @@ defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
|
||||
multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
|
||||
PatFrag mem_frag> {
|
||||
let isCommutable = 1 in
|
||||
def rr : FMA4<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
@ -228,6 +229,7 @@ let isCodeGenOnly = 1 in
|
||||
|
||||
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
|
||||
ComplexPattern mem_cpat, Intrinsic Int> {
|
||||
let isCommutable = 1 in
|
||||
def rr_Int : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
@ -251,6 +253,7 @@ multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
|
||||
multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
ValueType OpVT128, ValueType OpVT256,
|
||||
PatFrag ld_frag128, PatFrag ld_frag256> {
|
||||
let isCommutable = 1 in
|
||||
def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
@ -270,6 +273,7 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>;
|
||||
let isCommutable = 1 in
|
||||
def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
|
||||
(ins VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
|
@ -1110,6 +1110,36 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_32 },
|
||||
{ X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_32 },
|
||||
// FIXME: add AVX 256-bit foldable instructions
|
||||
|
||||
// FMA4 foldable patterns
|
||||
{ X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_16 },
|
||||
{ X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_16 },
|
||||
{ X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_16 },
|
||||
{ X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_16 },
|
||||
{ X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, TB_ALIGN_32 },
|
||||
{ X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, TB_ALIGN_32 },
|
||||
{ X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_16 },
|
||||
{ X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_16 },
|
||||
{ X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, TB_ALIGN_32 },
|
||||
{ X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, TB_ALIGN_32 },
|
||||
{ X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_16 },
|
||||
{ X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_16 },
|
||||
{ X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_16 },
|
||||
{ X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_16 },
|
||||
{ X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, TB_ALIGN_32 },
|
||||
{ X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, TB_ALIGN_32 },
|
||||
{ X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_16 },
|
||||
{ X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_16 },
|
||||
{ X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, TB_ALIGN_32 },
|
||||
{ X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, TB_ALIGN_32 },
|
||||
{ X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_16 },
|
||||
{ X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_16 },
|
||||
{ X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, TB_ALIGN_32 },
|
||||
{ X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, TB_ALIGN_32 },
|
||||
{ X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_16 },
|
||||
{ X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_16 },
|
||||
{ X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_32 },
|
||||
{ X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 },
|
||||
};
|
||||
|
||||
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
|
||||
@ -1237,6 +1267,36 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 },
|
||||
{ X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 },
|
||||
{ X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 },
|
||||
|
||||
// FMA4 foldable patterns
|
||||
{ X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_16 },
|
||||
{ X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_16 },
|
||||
{ X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_16 },
|
||||
{ X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_16 },
|
||||
{ X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_32 },
|
||||
{ X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_32 },
|
||||
{ X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_16 },
|
||||
{ X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_16 },
|
||||
{ X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_32 },
|
||||
{ X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_32 },
|
||||
{ X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_16 },
|
||||
{ X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_16 },
|
||||
{ X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_16 },
|
||||
{ X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_16 },
|
||||
{ X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_32 },
|
||||
{ X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_32 },
|
||||
{ X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_16 },
|
||||
{ X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_16 },
|
||||
{ X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_32 },
|
||||
{ X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_32 },
|
||||
{ X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_16 },
|
||||
{ X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_16 },
|
||||
{ X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_32 },
|
||||
{ X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_32 },
|
||||
{ X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_16 },
|
||||
{ X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 },
|
||||
{ X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 },
|
||||
{ X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 },
|
||||
};
|
||||
|
||||
for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
|
||||
|
@ -181,3 +181,32 @@ define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) {
|
||||
%res = fsub float %y, %a2
|
||||
ret float %res
|
||||
}
|
||||
|
||||
; CHECK: test_x86_fmadd_ps
|
||||
; CHECK: vmovaps (%rdi), %xmm2
|
||||
; CHECK: vfmadd213ps %xmm1, %xmm0, %xmm2
|
||||
; CHECK: ret
|
||||
; CHECK_FMA4: test_x86_fmadd_ps
|
||||
; CHECK_FMA4: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
|
||||
; CHECK_FMA4: ret
|
||||
define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
%x = load <4 x float>* %a0
|
||||
%y = fmul <4 x float> %x, %a1
|
||||
%res = fadd <4 x float> %y, %a2
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
; CHECK: test_x86_fmsub_ps
|
||||
; CHECK: vmovaps (%rdi), %xmm2
|
||||
; CHECK: fmsub213ps %xmm1, %xmm0, %xmm2
|
||||
; CHECK: ret
|
||||
; CHECK_FMA4: test_x86_fmsub_ps
|
||||
; CHECK_FMA4: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0
|
||||
; CHECK_FMA4: ret
|
||||
define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
%x = load <4 x float>* %a0
|
||||
%y = fmul <4 x float> %x, %a1
|
||||
%res = fsub <4 x float> %y, %a2
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user