From 99adffe5f20b61b012ede34859f92de3ab2a41cc Mon Sep 17 00:00:00 2001 From: David Goodwin Date: Tue, 4 Aug 2009 17:53:06 +0000 Subject: [PATCH] Initial support for single-precision FP using NEON. Added "neonfp" attribute to enable. Added patterns for some binary FP operations. llvm-svn: 78081 --- lib/Target/ARM/ARM.td | 3 +++ lib/Target/ARM/ARMInstrFormats.td | 14 ++++++++++++++ lib/Target/ARM/ARMInstrInfo.td | 2 ++ lib/Target/ARM/ARMInstrNEON.td | 28 ++++++++++++++++++++++++++++ lib/Target/ARM/ARMInstrVFP.td | 30 +++++++++++++++--------------- lib/Target/ARM/ARMSubtarget.cpp | 1 + lib/Target/ARM/ARMSubtarget.h | 7 ++++++- test/CodeGen/ARM/fadds.ll | 10 ++++++++++ test/CodeGen/ARM/fdivs.ll | 10 ++++++++++ test/CodeGen/ARM/fmacs.ll | 11 +++++++++++ test/CodeGen/ARM/fmscs.ll | 11 +++++++++++ test/CodeGen/ARM/fmuls.ll | 10 ++++++++++ test/CodeGen/ARM/fnmacs.ll | 12 ++++++++++++ test/CodeGen/ARM/fnmscs.ll | 13 +++++++++++++ test/CodeGen/ARM/fnmuls.ll | 12 ++++++++++++ test/CodeGen/ARM/fsubs.ll | 10 ++++++++++ 16 files changed, 168 insertions(+), 16 deletions(-) create mode 100644 test/CodeGen/ARM/fadds.ll create mode 100644 test/CodeGen/ARM/fdivs.ll create mode 100644 test/CodeGen/ARM/fmacs.ll create mode 100644 test/CodeGen/ARM/fmscs.ll create mode 100644 test/CodeGen/ARM/fmuls.ll create mode 100644 test/CodeGen/ARM/fnmacs.ll create mode 100644 test/CodeGen/ARM/fnmscs.ll create mode 100644 test/CodeGen/ARM/fnmuls.ll create mode 100644 test/CodeGen/ARM/fsubs.ll diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 8c987c268df..172c7de6259 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -32,6 +32,9 @@ def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2", "ARM v6t2">; def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A", "ARM v7A">; +def FeatureNEONFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", + "true", + "Use NEON for single-precision FP">; def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2", "Enable VFP2 instructions">; def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3", diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index fe32c5f669b..de2bb78bb41 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1080,6 +1080,14 @@ class ASbI opcod, dag oops, dag iops, string opc, let Inst{11-8} = 0b1010; } +// Single precision, binary if no NEON +// Same as ASbI except not available if NEON is enabled +class ASbIn opcod, dag oops, dag iops, string opc, + string asm, list pattern> + : ASbI { + list Predicates = [HasVFP2,DontUseNEONForFP]; +} + // VFP conversion instructions class AVConv1I opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, string opc, string asm, list pattern> @@ -1220,3 +1228,9 @@ class NVSetLane opcod1, bits<4> opcod2, bits<2> opcod3, class NVDup opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, string opc, string asm, list pattern> : NVLaneOp; + +// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON +// for single-precision FP. +class NEONFPPat : Pat { + list Predicates = [HasNEON,UseNEONForFP]; +} diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index db3afba57bd..e4a95a74e79 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -104,6 +104,8 @@ def HasV7 : Predicate<"Subtarget->hasV7Ops()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; def HasNEON : Predicate<"Subtarget->hasNEON()">; +def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; +def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; def IsThumb : Predicate<"Subtarget->isThumb()">; def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">; def IsThumb2 : Predicate<"Subtarget->isThumb2()">; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 5e8a4b5994f..ec4702f5d69 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -283,6 +283,13 @@ class N3VQ op21_20, bits<4> op11_8, bit op4, let isCommutable = Commutable; } +// Basic 3-register operations, scalar single-precision +class N3VDs + : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), + (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)), + arm_ssubreg_0)>; + // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt op21_20, bits<4> op11_8, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, @@ -319,6 +326,15 @@ class N3VQMulOp op21_20, bits<4> op11_8, bit op4, [(set QPR:$dst, (Ty (OpNode QPR:$src1, (Ty (MulOp QPR:$src2, QPR:$src3)))))]>; +// Multiply-Add/Sub operations, scalar single-precision +class N3VDMulOps + : NEONFPPat<(f32 (OpNode SPR:$acc, + (f32 (MulNode SPR:$a, SPR:$b)))), + (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$acc, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)), + arm_ssubreg_0)>; + // Neon 3-argument intrinsics, both double- and quad-register. // The destination register is also used as the first source operand register. class N3VDInt3 op21_20, bits<4> op11_8, bit op4, @@ -886,6 +902,9 @@ defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>; // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>; +// Vector Add Operations used for single-precision FP +def : N3VDs; + // Vector Multiply Operations. // VMUL : Vector Multiply (integer, polynomial and floating-point) @@ -908,6 +927,9 @@ def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, "vmull.p8", v8i16, v8i8, // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, "vqdmull.s", int_arm_neon_vqdmull, 1>; +// Vector Multiply Operations used for single-precision FP +def : N3VDs; + // Vector Multiply-Accumulate and Multiply-Subtract Operations. // VMLA : Vector Multiply Accumulate (integer and floating-point) @@ -929,6 +951,9 @@ defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>; +// Vector Multiply-Accumulate/Subtract used for single-precision FP +def : N3VDMulOps; + // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) @@ -952,6 +977,9 @@ defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>; // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>; +// Vector Sub Operations used for single-precision FP +def : N3VDs; + // Vector Comparisons. // VCEQ : Vector Compare Equal diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index a9b4a32f17f..2ecf5f3a420 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -98,9 +98,9 @@ def FADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), "faddd", " $dst, $a, $b", [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>; -def FADDS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - "fadds", " $dst, $a, $b", - [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>; +def FADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + "fadds", " $dst, $a, $b", + [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>; // These are encoded as unary instructions. let Defs = [FPSCR] in { @@ -125,9 +125,9 @@ def FMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), "fmuld", " $dst, $a, $b", [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>; -def FMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - "fmuls", " $dst, $a, $b", - [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>; +def FMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + "fmuls", " $dst, $a, $b", + [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>; def FNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), "fnmuld", " $dst, $a, $b", @@ -154,9 +154,9 @@ def FSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), let Inst{6} = 1; } -def FSUBS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - "fsubs", " $dst, $a, $b", - [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> { +def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + "fsubs", " $dst, $a, $b", + [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> { let Inst{6} = 1; } @@ -317,10 +317,10 @@ def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def FMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - "fmacs", " $dst, $a, $b", - [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, - RegConstraint<"$dstin = $dst">; +def FMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + "fmacs", " $dst, $a, $b", + [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), "fmscd", " $dst, $a, $b", @@ -339,8 +339,8 @@ def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), let Inst{6} = 1; } -def FNMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - "fnmacs", " $dst, $a, $b", +def FNMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + "fnmacs", " $dst, $a, $b", [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 71c77e10682..4e706c5b39c 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -25,6 +25,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, bool isThumb) : ARMArchVersion(V4T) , ARMFPUType(None) + , UseNEONForSinglePrecisionFP(false) , IsThumb(isThumb) , ThumbMode(Thumb1) , IsR9Reserved(ReserveR9) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 4ec77ff93e6..6d1ffc44206 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -42,6 +42,9 @@ protected: /// ARMFPUType - Floating Point Unit type. ARMFPEnum ARMFPUType; + /// UseNEONForSinglePrecisionFP - if NEON is available use for FP + bool UseNEONForSinglePrecisionFP; + /// IsThumb - True if we are in thumb mode, false if in ARM mode. bool IsThumb; @@ -98,7 +101,9 @@ protected: bool hasVFP2() const { return ARMFPUType >= VFPv2; } bool hasVFP3() const { return ARMFPUType >= VFPv3; } bool hasNEON() const { return ARMFPUType >= NEON; } - + bool useNEONForSinglePrecisionFP() const { + return hasNEON() && UseNEONForSinglePrecisionFP; } + bool isTargetDarwin() const { return TargetType == isDarwin; } bool isTargetELF() const { return TargetType == isELF; } diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll new file mode 100644 index 00000000000..35c74f78d29 --- /dev/null +++ b/test/CodeGen/ARM/fadds.ll @@ -0,0 +1,10 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %a, float %b) { +entry: + %0 = fadd float %a, %b + ret float %0 +} + diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll new file mode 100644 index 00000000000..9637ccb9613 --- /dev/null +++ b/test/CodeGen/ARM/fdivs.ll @@ -0,0 +1,10 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %a, float %b) { +entry: + %0 = fdiv float %a, %b + ret float %0 +} + diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll new file mode 100644 index 00000000000..24517e1c5dc --- /dev/null +++ b/test/CodeGen/ARM/fmacs.ll @@ -0,0 +1,11 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %acc, float %a, float %b) { +entry: + %0 = fmul float %a, %b + %1 = fadd float %acc, %0 + ret float %1 +} + diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll new file mode 100644 index 00000000000..5338f44e979 --- /dev/null +++ b/test/CodeGen/ARM/fmscs.ll @@ -0,0 +1,11 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %acc, float %a, float %b) { +entry: + %0 = fmul float %a, %b + %1 = fsub float %0, %acc + ret float %1 +} + diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll new file mode 100644 index 00000000000..24c04abd1da --- /dev/null +++ b/test/CodeGen/ARM/fmuls.ll @@ -0,0 +1,10 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %a, float %b) { +entry: + %0 = fmul float %a, %b + ret float %0 +} + diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll new file mode 100644 index 00000000000..537c4110ba4 --- /dev/null +++ b/test/CodeGen/ARM/fnmacs.ll @@ -0,0 +1,12 @@ +; XFAIL: * +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vmls.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %acc, float %a, float %b) { +entry: + %0 = fmul float %a, %b + %1 = fsub float %acc, %0 + ret float %1 +} + diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll new file mode 100644 index 00000000000..da3b95f7336 --- /dev/null +++ b/test/CodeGen/ARM/fnmscs.ll @@ -0,0 +1,13 @@ +; XFAIL: * +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fnmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {fnmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fnmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %acc, float %a, float %b) { +entry: + %0 = fmul float %a, %b + %1 = fsub float 0.0, %0 + %2 = fsub float %1, %acc + ret float %2 +} + diff --git a/test/CodeGen/ARM/fnmuls.ll b/test/CodeGen/ARM/fnmuls.ll new file mode 100644 index 00000000000..7130aa6be65 --- /dev/null +++ b/test/CodeGen/ARM/fnmuls.ll @@ -0,0 +1,12 @@ +; XFAIL: * +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fnmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {fnmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fnmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %a, float %b) { +entry: + %0 = fmul float %a, %b + %1 = fsub float 0.0, %0 + ret float %1 +} + diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll new file mode 100644 index 00000000000..e3182374018 --- /dev/null +++ b/test/CodeGen/ARM/fsubs.ll @@ -0,0 +1,10 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vsub.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %a, float %b) { +entry: + %0 = fsub float %a, %b + ret float %0 +} +