From 338268c67fbb7252702bf400495771068750466b Mon Sep 17 00:00:00 2001 From: David Goodwin Date: Mon, 10 Aug 2009 22:17:39 +0000 Subject: [PATCH] Use NEON for single-precision int<->FP conversions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78604 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 12 +++++-- lib/Target/ARM/ARMInstrNEON.td | 60 ++++++++++++++++++++++++------- lib/Target/ARM/ARMInstrVFP.td | 12 +++---- test/CodeGen/ARM/fsitos.ll | 12 +++++++ test/CodeGen/ARM/ftosizs.ll | 12 +++++++ test/CodeGen/ARM/ftouizs.ll | 12 +++++++ test/CodeGen/ARM/fuitos.ll | 12 +++++++ 7 files changed, 111 insertions(+), 21 deletions(-) create mode 100644 test/CodeGen/ARM/fsitos.ll create mode 100644 test/CodeGen/ARM/ftosizs.ll create mode 100644 test/CodeGen/ARM/ftouizs.ll create mode 100644 test/CodeGen/ARM/fuitos.ll diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 5bc7212881c..deff83b1c99 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1118,7 +1118,7 @@ class ASuI opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, let Inst{7-4} = opcod3; } -// Single precision, unary if no NEON +// Single precision unary, if no NEON // Same as ASuI except not available if NEON is enabled class ASuIn opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> @@ -1135,7 +1135,7 @@ class ASbI opcod, dag oops, dag iops, InstrItinClass itin, let Inst{11-8} = 0b1010; } -// Single precision, binary if no NEON +// Single precision binary, if no NEON // Same as ASbI except not available if NEON is enabled class ASbIn opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> @@ -1154,6 +1154,14 @@ class AVConv1I opcod1, bits<4> opcod2, bits<4> opcod3, let Inst{6} = 1; } +// VFP conversion instructions, if no NEON +class AVConv1In opcod1, bits<4> opcod2, bits<4> opcod3, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AVConv1I { + list Predicates = [HasVFP2,DontUseNEONForFP]; +} + class AVConvXI opcod1, bits<4> opcod2, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list pattern> diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 0d48aa65075..77bea683520 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -324,6 +324,20 @@ class N2VQ op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), NoItinerary, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; +// Basic 2-register operations, scalar single-precision. +class N2VDs op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + ValueType ResTy, ValueType OpTy, SDNode OpNode> + : N2V; + +class N2VDsPat + : NEONFPPat<(ResTy (OpNode SPR:$a)), + (EXTRACT_SUBREG + (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)), + arm_ssubreg_0)>; + // Basic 2-register intrinsics, both double- and quad-register. class N2VDInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, @@ -338,7 +352,7 @@ class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), NoItinerary, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; -// Basic 2-register operations, scalar single-precision +// Basic 2-register intrinsics, scalar single-precision class N2VDInts op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> @@ -1981,6 +1995,11 @@ let neverHasSideEffects = 1 in def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd,1>; def : N3VDsPat; +// Vector Sub Operations used for single-precision FP +let neverHasSideEffects = 1 in +def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub,0>; +def : N3VDsPat; + // Vector Multiply Operations used for single-precision FP let neverHasSideEffects = 1 in def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul,1>; @@ -1989,31 +2008,46 @@ def : N3VDsPat; // Vector Multiply-Accumulate/Subtract used for single-precision FP let neverHasSideEffects = 1 in def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, "vmla.f32", v2f32,fmul,fadd>; -def : N3VDMulOpsPat; +def : N3VDMulOpsPat; let neverHasSideEffects = 1 in def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, "vmls.f32", v2f32,fmul,fsub>; -def : N3VDMulOpsPat; +def : N3VDMulOpsPat; -// Vector Sub Operations used for single-precision FP -let neverHasSideEffects = 1 in -def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub,0>; -def : N3VDsPat; - -// Vector Absolute for single-precision FP +// Vector Absolute used for single-precision FP let neverHasSideEffects = 1 in def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32", v2f32, v2f32, int_arm_neon_vabsf>; def : N2VDIntsPat; -// Vector Negate for single-precision FP - +// Vector Negate used for single-precision FP let neverHasSideEffects = 1 in def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), NoItinerary, - "vneg.f32\t$dst, $src", "", []>; + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), NoItinerary, + "vneg.f32\t$dst, $src", "", []>; def : N2VDIntsPat; +// Vector Convert between single-precision FP and integer +let neverHasSideEffects = 1 in +def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", + v2i32, v2f32, fp_to_sint>; +def : N2VDsPat; + +let neverHasSideEffects = 1 in +def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", + v2i32, v2f32, fp_to_uint>; +def : N2VDsPat; + +let neverHasSideEffects = 1 in +def VCVTs2fd_sfp : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", + v2f32, v2i32, sint_to_fp>; +def : N2VDsPat; + +let neverHasSideEffects = 1 in +def VCVTu2fd_sfp : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", + v2f32, v2i32, uint_to_fp>; +def : N2VDsPat; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 9eb11475830..f7c16bb9c22 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -263,7 +263,7 @@ def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), let Inst{7} = 1; } -def FSITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a), +def FSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), IIC_fpALU, "fsitos", " $dst, $a", [(set SPR:$dst, (arm_sitof SPR:$a))]> { let Inst{7} = 1; @@ -273,7 +273,7 @@ def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), IIC_fpALU, "fuitod", " $dst, $a", [(set DPR:$dst, (arm_uitof SPR:$a))]>; -def FUITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a), +def FUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), IIC_fpALU, "fuitos", " $dst, $a", [(set SPR:$dst, (arm_uitof SPR:$a))]>; @@ -287,8 +287,8 @@ def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011, let Inst{7} = 1; // Z bit } -def FTOSIZS : AVConv1I<0b11101011, 0b1101, 0b1010, - (outs SPR:$dst), (ins SPR:$a), +def FTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010, + (outs SPR:$dst), (ins SPR:$a), IIC_fpALU, "ftosizs", " $dst, $a", [(set SPR:$dst, (arm_ftosi SPR:$a))]> { let Inst{7} = 1; // Z bit @@ -301,8 +301,8 @@ def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011, let Inst{7} = 1; // Z bit } -def FTOUIZS : AVConv1I<0b11101011, 0b1100, 0b1010, - (outs SPR:$dst), (ins SPR:$a), +def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010, + (outs SPR:$dst), (ins SPR:$a), IIC_fpALU, "ftouizs", " $dst, $a", [(set SPR:$dst, (arm_ftoui SPR:$a))]> { let Inst{7} = 1; // Z bit diff --git a/test/CodeGen/ARM/fsitos.ll b/test/CodeGen/ARM/fsitos.ll new file mode 100644 index 00000000000..7420504df6f --- /dev/null +++ b/test/CodeGen/ARM/fsitos.ll @@ -0,0 +1,12 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fsitos\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vcvt.f32.s32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fsitos\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mcpu=cortex-a8 | grep -E {vcvt.f32.s32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mcpu=cortex-a9 | grep -E {fsitos\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(i32 %a, i32 %b) { +entry: + %0 = add i32 %a, %b + %1 = sitofp i32 %0 to float + ret float %1 +} diff --git a/test/CodeGen/ARM/ftosizs.ll b/test/CodeGen/ARM/ftosizs.ll new file mode 100644 index 00000000000..5ab77909d58 --- /dev/null +++ b/test/CodeGen/ARM/ftosizs.ll @@ -0,0 +1,12 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {ftosizs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vcvt.s32.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {ftosizs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mcpu=cortex-a8 | grep -E {vcvt.s32.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mcpu=cortex-a9 | grep -E {ftosizs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define i32 @test(float %a, float %b) { +entry: + %0 = fadd float %a, %b + %1 = fptosi float %0 to i32 + ret i32 %1 +} diff --git a/test/CodeGen/ARM/ftouizs.ll b/test/CodeGen/ARM/ftouizs.ll new file mode 100644 index 00000000000..1cc5115352e --- /dev/null +++ b/test/CodeGen/ARM/ftouizs.ll @@ -0,0 +1,12 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {ftouizs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vcvt.u32.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {ftouizs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mcpu=cortex-a8 | grep -E {vcvt.u32.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mcpu=cortex-a9 | grep -E {ftouizs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define i32 @test(float %a, float %b) { +entry: + %0 = fadd float %a, %b + %1 = fptoui float %0 to i32 + ret i32 %1 +} diff --git a/test/CodeGen/ARM/fuitos.ll b/test/CodeGen/ARM/fuitos.ll new file mode 100644 index 00000000000..0b70dc73641 --- /dev/null +++ b/test/CodeGen/ARM/fuitos.ll @@ -0,0 +1,12 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fuitos\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vcvt.f32.u32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fuitos\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mcpu=cortex-a8 | grep -E {vcvt.f32.u32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mcpu=cortex-a9 | grep -E {fuitos\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(i32 %a, i32 %b) { +entry: + %0 = add i32 %a, %b + %1 = uitofp i32 %0 to float + ret float %1 +}