From 2e53378ff65464d7e5a56b3b2cdbadce45e95b0f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 31 Aug 2012 23:10:34 +0000 Subject: [PATCH] Mark FMA4 instructions as commutable and add them to the folding tables. llvm-svn: 163035 --- lib/Target/X86/X86InstrFMA.td | 4 +++ lib/Target/X86/X86InstrInfo.cpp | 60 ++++++++++++++++++++++++++++++++ test/CodeGen/X86/fma_patterns.ll | 29 +++++++++++++++ 3 files changed, 93 insertions(+) diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index f9823fb5d37..56638002d8e 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -200,6 +200,7 @@ defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss, multiclass fma4s opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, ValueType OpVT, SDNode OpNode, PatFrag mem_frag> { + let isCommutable = 1 in def rr : FMA4 opc, string OpcodeStr, Operand memop, ComplexPattern mem_cpat, Intrinsic Int> { + let isCommutable = 1 in def rr_Int : FMA4 opc, string OpcodeStr, Operand memop, multiclass fma4p opc, string OpcodeStr, SDNode OpNode, ValueType OpVT128, ValueType OpVT256, PatFrag ld_frag128, PatFrag ld_frag256> { + let isCommutable = 1 in def rr : FMA4 opc, string OpcodeStr, SDNode OpNode, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>; + let isCommutable = 1 in def rrY : FMA4 @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) { + %x = load <4 x float>* %a0 + %y = fmul <4 x float> %x, %a1 + %res = fadd <4 x float> %y, %a2 + ret <4 x float> %res +} + +; CHECK: test_x86_fmsub_ps +; CHECK: vmovaps (%rdi), %xmm2 +; CHECK: fmsub213ps %xmm1, %xmm0, %xmm2 +; CHECK: ret +; CHECK_FMA4: test_x86_fmsub_ps +; CHECK_FMA4: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0 +; CHECK_FMA4: ret +define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) { + %x = load <4 x float>* %a0 + %y = fmul <4 x float> %x, %a1 + %res = fsub <4 x float> %y, %a2 + ret <4 x float> %res +} +