mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-20 18:56:04 +00:00
Initial support for single-precision FP using NEON. Added "neonfp" attribute to enable. Added patterns for some binary FP operations.
llvm-svn: 78081
This commit is contained in:
parent
60425b2194
commit
99adffe5f2
@ -32,6 +32,9 @@ def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
|
||||
"ARM v6t2">;
|
||||
def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
|
||||
"ARM v7A">;
|
||||
def FeatureNEONFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
|
||||
"true",
|
||||
"Use NEON for single-precision FP">;
|
||||
def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
|
||||
"Enable VFP2 instructions">;
|
||||
def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
|
||||
|
@ -1080,6 +1080,14 @@ class ASbI<bits<8> opcod, dag oops, dag iops, string opc,
|
||||
let Inst{11-8} = 0b1010;
|
||||
}
|
||||
|
||||
// Single precision, binary if no NEON
|
||||
// Same as ASbI except not available if NEON is enabled
|
||||
class ASbIn<bits<8> opcod, dag oops, dag iops, string opc,
|
||||
string asm, list<dag> pattern>
|
||||
: ASbI<opcod, oops, iops, opc, asm, pattern> {
|
||||
list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
|
||||
}
|
||||
|
||||
// VFP conversion instructions
|
||||
class AVConv1I<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3,
|
||||
dag oops, dag iops, string opc, string asm, list<dag> pattern>
|
||||
@ -1220,3 +1228,9 @@ class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
|
||||
class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
|
||||
dag oops, dag iops, string opc, string asm, list<dag> pattern>
|
||||
: NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONDupFrm, opc, asm, pattern>;
|
||||
|
||||
// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
|
||||
// for single-precision FP.
|
||||
class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
|
||||
list<Predicate> Predicates = [HasNEON,UseNEONForFP];
|
||||
}
|
||||
|
@ -104,6 +104,8 @@ def HasV7 : Predicate<"Subtarget->hasV7Ops()">;
|
||||
def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
|
||||
def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
|
||||
def HasNEON : Predicate<"Subtarget->hasNEON()">;
|
||||
def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
|
||||
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
|
||||
def IsThumb : Predicate<"Subtarget->isThumb()">;
|
||||
def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
|
||||
def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
|
||||
|
@ -283,6 +283,13 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Basic 3-register operations, scalar single-precision
|
||||
class N3VDs<SDNode OpNode, NeonI Inst>
|
||||
: NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
|
||||
(EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0),
|
||||
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)),
|
||||
arm_ssubreg_0)>;
|
||||
|
||||
// Basic 3-register intrinsics, both double- and quad-register.
|
||||
class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
|
||||
string OpcodeStr, ValueType ResTy, ValueType OpTy,
|
||||
@ -319,6 +326,15 @@ class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
|
||||
[(set QPR:$dst, (Ty (OpNode QPR:$src1,
|
||||
(Ty (MulOp QPR:$src2, QPR:$src3)))))]>;
|
||||
|
||||
// Multiply-Add/Sub operations, scalar single-precision
|
||||
class N3VDMulOps<SDNode MulNode, SDNode OpNode, NeonI Inst>
|
||||
: NEONFPPat<(f32 (OpNode SPR:$acc,
|
||||
(f32 (MulNode SPR:$a, SPR:$b)))),
|
||||
(EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$acc, arm_ssubreg_0),
|
||||
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0),
|
||||
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)),
|
||||
arm_ssubreg_0)>;
|
||||
|
||||
// Neon 3-argument intrinsics, both double- and quad-register.
|
||||
// The destination register is also used as the first source operand register.
|
||||
class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
|
||||
@ -886,6 +902,9 @@ defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>;
|
||||
// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
|
||||
defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>;
|
||||
|
||||
// Vector Add Operations used for single-precision FP
|
||||
def : N3VDs<fadd, VADDfd>;
|
||||
|
||||
// Vector Multiply Operations.
|
||||
|
||||
// VMUL : Vector Multiply (integer, polynomial and floating-point)
|
||||
@ -908,6 +927,9 @@ def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, "vmull.p8", v8i16, v8i8,
|
||||
// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
|
||||
defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, "vqdmull.s", int_arm_neon_vqdmull, 1>;
|
||||
|
||||
// Vector Multiply Operations used for single-precision FP
|
||||
def : N3VDs<fmul, VMULfd>;
|
||||
|
||||
// Vector Multiply-Accumulate and Multiply-Subtract Operations.
|
||||
|
||||
// VMLA : Vector Multiply Accumulate (integer and floating-point)
|
||||
@ -929,6 +951,9 @@ defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>;
|
||||
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
|
||||
defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>;
|
||||
|
||||
// Vector Multiply-Accumulate/Subtract used for single-precision FP
|
||||
def : N3VDMulOps<fmul, fadd, VMLAfd>;
|
||||
|
||||
// Vector Subtract Operations.
|
||||
|
||||
// VSUB : Vector Subtract (integer and floating-point)
|
||||
@ -952,6 +977,9 @@ defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>;
|
||||
// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
|
||||
defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>;
|
||||
|
||||
// Vector Sub Operations used for single-precision FP
|
||||
def : N3VDs<fsub, VSUBfd>;
|
||||
|
||||
// Vector Comparisons.
|
||||
|
||||
// VCEQ : Vector Compare Equal
|
||||
|
@ -98,7 +98,7 @@ def FADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
|
||||
"faddd", " $dst, $a, $b",
|
||||
[(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
|
||||
|
||||
def FADDS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
|
||||
def FADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
|
||||
"fadds", " $dst, $a, $b",
|
||||
[(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
|
||||
|
||||
@ -125,7 +125,7 @@ def FMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
|
||||
"fmuld", " $dst, $a, $b",
|
||||
[(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
|
||||
|
||||
def FMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
|
||||
def FMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
|
||||
"fmuls", " $dst, $a, $b",
|
||||
[(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
|
||||
|
||||
@ -154,7 +154,7 @@ def FSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
|
||||
let Inst{6} = 1;
|
||||
}
|
||||
|
||||
def FSUBS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
|
||||
def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
|
||||
"fsubs", " $dst, $a, $b",
|
||||
[(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> {
|
||||
let Inst{6} = 1;
|
||||
@ -317,7 +317,7 @@ def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
|
||||
[(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
|
||||
RegConstraint<"$dstin = $dst">;
|
||||
|
||||
def FMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
|
||||
def FMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
|
||||
"fmacs", " $dst, $a, $b",
|
||||
[(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
|
||||
RegConstraint<"$dstin = $dst">;
|
||||
@ -339,7 +339,7 @@ def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
|
||||
let Inst{6} = 1;
|
||||
}
|
||||
|
||||
def FNMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
|
||||
def FNMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
|
||||
"fnmacs", " $dst, $a, $b",
|
||||
[(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
|
||||
RegConstraint<"$dstin = $dst"> {
|
||||
|
@ -25,6 +25,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
|
||||
bool isThumb)
|
||||
: ARMArchVersion(V4T)
|
||||
, ARMFPUType(None)
|
||||
, UseNEONForSinglePrecisionFP(false)
|
||||
, IsThumb(isThumb)
|
||||
, ThumbMode(Thumb1)
|
||||
, IsR9Reserved(ReserveR9)
|
||||
|
@ -42,6 +42,9 @@ protected:
|
||||
/// ARMFPUType - Floating Point Unit type.
|
||||
ARMFPEnum ARMFPUType;
|
||||
|
||||
/// UseNEONForSinglePrecisionFP - if NEON is available use for FP
|
||||
bool UseNEONForSinglePrecisionFP;
|
||||
|
||||
/// IsThumb - True if we are in thumb mode, false if in ARM mode.
|
||||
bool IsThumb;
|
||||
|
||||
@ -98,6 +101,8 @@ protected:
|
||||
bool hasVFP2() const { return ARMFPUType >= VFPv2; }
|
||||
bool hasVFP3() const { return ARMFPUType >= VFPv3; }
|
||||
bool hasNEON() const { return ARMFPUType >= NEON; }
|
||||
bool useNEONForSinglePrecisionFP() const {
|
||||
return hasNEON() && UseNEONForSinglePrecisionFP; }
|
||||
|
||||
bool isTargetDarwin() const { return TargetType == isDarwin; }
|
||||
bool isTargetELF() const { return TargetType == isELF; }
|
||||
|
10
test/CodeGen/ARM/fadds.ll
Normal file
10
test/CodeGen/ARM/fadds.ll
Normal file
@ -0,0 +1,10 @@
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
|
||||
define float @test(float %a, float %b) {
|
||||
entry:
|
||||
%0 = fadd float %a, %b
|
||||
ret float %0
|
||||
}
|
||||
|
10
test/CodeGen/ARM/fdivs.ll
Normal file
10
test/CodeGen/ARM/fdivs.ll
Normal file
@ -0,0 +1,10 @@
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
|
||||
define float @test(float %a, float %b) {
|
||||
entry:
|
||||
%0 = fdiv float %a, %b
|
||||
ret float %0
|
||||
}
|
||||
|
11
test/CodeGen/ARM/fmacs.ll
Normal file
11
test/CodeGen/ARM/fmacs.ll
Normal file
@ -0,0 +1,11 @@
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
|
||||
define float @test(float %acc, float %a, float %b) {
|
||||
entry:
|
||||
%0 = fmul float %a, %b
|
||||
%1 = fadd float %acc, %0
|
||||
ret float %1
|
||||
}
|
||||
|
11
test/CodeGen/ARM/fmscs.ll
Normal file
11
test/CodeGen/ARM/fmscs.ll
Normal file
@ -0,0 +1,11 @@
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
|
||||
define float @test(float %acc, float %a, float %b) {
|
||||
entry:
|
||||
%0 = fmul float %a, %b
|
||||
%1 = fsub float %0, %acc
|
||||
ret float %1
|
||||
}
|
||||
|
10
test/CodeGen/ARM/fmuls.ll
Normal file
10
test/CodeGen/ARM/fmuls.ll
Normal file
@ -0,0 +1,10 @@
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
|
||||
define float @test(float %a, float %b) {
|
||||
entry:
|
||||
%0 = fmul float %a, %b
|
||||
ret float %0
|
||||
}
|
||||
|
12
test/CodeGen/ARM/fnmacs.ll
Normal file
12
test/CodeGen/ARM/fnmacs.ll
Normal file
@ -0,0 +1,12 @@
|
||||
; XFAIL: *
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vmls.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
|
||||
define float @test(float %acc, float %a, float %b) {
|
||||
entry:
|
||||
%0 = fmul float %a, %b
|
||||
%1 = fsub float %acc, %0
|
||||
ret float %1
|
||||
}
|
||||
|
13
test/CodeGen/ARM/fnmscs.ll
Normal file
13
test/CodeGen/ARM/fnmscs.ll
Normal file
@ -0,0 +1,13 @@
|
||||
; XFAIL: *
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fnmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {fnmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fnmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
|
||||
define float @test(float %acc, float %a, float %b) {
|
||||
entry:
|
||||
%0 = fmul float %a, %b
|
||||
%1 = fsub float 0.0, %0
|
||||
%2 = fsub float %1, %acc
|
||||
ret float %2
|
||||
}
|
||||
|
12
test/CodeGen/ARM/fnmuls.ll
Normal file
12
test/CodeGen/ARM/fnmuls.ll
Normal file
@ -0,0 +1,12 @@
|
||||
; XFAIL: *
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fnmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {fnmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fnmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
|
||||
define float @test(float %a, float %b) {
|
||||
entry:
|
||||
%0 = fmul float %a, %b
|
||||
%1 = fsub float 0.0, %0
|
||||
ret float %1
|
||||
}
|
||||
|
10
test/CodeGen/ARM/fsubs.ll
Normal file
10
test/CodeGen/ARM/fsubs.ll
Normal file
@ -0,0 +1,10 @@
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vsub.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
|
||||
define float @test(float %a, float %b) {
|
||||
entry:
|
||||
%0 = fsub float %a, %b
|
||||
ret float %0
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user