mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-10 14:12:11 +00:00
[ARM] Add scheduling info for VFMS
The scalar VFMS instructions did not have scheduling information attached (but VFMA did), which was causing assertion failures with the Cortex-A57 scheduling model and -fp-contract=fast. Differential Revision: https://reviews.llvm.org/D34040 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305064 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1a8e4006cb
commit
14310e3851
@ -1958,7 +1958,8 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0,
|
||||
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
|
||||
(f64 DPR:$Ddin)))]>,
|
||||
RegConstraint<"$Ddin = $Dd">,
|
||||
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
|
||||
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
|
||||
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
||||
|
||||
def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
||||
@ -1966,7 +1967,8 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
|
||||
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
|
||||
SPR:$Sdin))]>,
|
||||
RegConstraint<"$Sdin = $Sd">,
|
||||
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
|
||||
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
|
||||
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
// VFP pipelines.
|
||||
}
|
||||
@ -1976,7 +1978,8 @@ def VFMSH : AHbI<0b11101, 0b10, 1, 0,
|
||||
IIC_fpFMAC16, "vfms", ".f16\t$Sd, $Sn, $Sm",
|
||||
[]>,
|
||||
RegConstraint<"$Sdin = $Sd">,
|
||||
Requires<[HasFullFP16,UseFusedMAC]>;
|
||||
Requires<[HasFullFP16,UseFusedMAC]>,
|
||||
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
||||
|
||||
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
||||
(VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
|
||||
|
@ -1,5 +1,6 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEFAULT
|
||||
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null -fp-contract=fast | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FAST
|
||||
; Check latencies of vmul/vfma accumulate chains.
|
||||
|
||||
define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) {
|
||||
@ -14,7 +15,8 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float
|
||||
; > VMULS read-advanced latency to VMLAS = 0
|
||||
; CHECK-SAME: Latency=0
|
||||
|
||||
; CHECK: VMLAS
|
||||
; CHECK-DEFAULT: VMLAS
|
||||
; CHECK-FAST: VFMAS
|
||||
; > VMLAS common latency = 9
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
@ -22,7 +24,8 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float
|
||||
; > VMLAS read-advanced latency to the next VMLAS = 4
|
||||
; CHECK-SAME: Latency=4
|
||||
|
||||
; CHECK: VMLAS
|
||||
; CHECK-DEFAULT: VMLAS
|
||||
; CHECK-FAST: VFMAS
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
@ -51,7 +54,8 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
|
||||
; VMULfd read-advanced latency to VMLAfd = 0
|
||||
; CHECK-SAME: Latency=0
|
||||
|
||||
; CHECK: VMLAfd
|
||||
; CHECK-DEFAULT: VMLAfd
|
||||
; CHECK-FAST: VFMAfd
|
||||
; > VMLAfd common latency = 9
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
@ -59,7 +63,8 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
|
||||
; > VMLAfd read-advanced latency to the next VMLAfd = 4
|
||||
; CHECK-SAME: Latency=4
|
||||
|
||||
; CHECK: VMLAfd
|
||||
; CHECK-DEFAULT: VMLAfd
|
||||
; CHECK-FAST: VFMAfd
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
@ -75,3 +80,79 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
|
||||
ret <2 x float> %add2
|
||||
}
|
||||
|
||||
define float @Test3(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) {
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK: Test3:BB#0
|
||||
|
||||
; CHECK: VMULS
|
||||
; > VMULS common latency = 5
|
||||
; CHECK: Latency : 5
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; > VMULS read-advanced latency to VMLSS = 0
|
||||
; CHECK-SAME: Latency=0
|
||||
|
||||
; CHECK-DEFAULT: VMLSS
|
||||
; CHECK-FAST: VFMSS
|
||||
; > VMLSS common latency = 9
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; > VMLSS read-advanced latency to the next VMLSS = 4
|
||||
; CHECK-SAME: Latency=4
|
||||
|
||||
; CHECK-DEFAULT: VMLSS
|
||||
; CHECK-FAST: VFMSS
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; > VMLSS not-optimized latency to VMOVRS = 9
|
||||
; CHECK-SAME: Latency=9
|
||||
|
||||
; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLSS, VMLSS
|
||||
%mul1 = fmul float %f1, %f2
|
||||
%mul2 = fmul float %f3, %f4
|
||||
%mul3 = fmul float %f5, %f6
|
||||
%sub1 = fsub float %mul1, %mul2
|
||||
%sub2 = fsub float %sub1, %mul3
|
||||
ret float %sub2
|
||||
}
|
||||
|
||||
; ASIMD form
|
||||
define <2 x float> @Test4(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 x float> %f4, <2 x float> %f5, <2 x float> %f6) {
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK: Test4:BB#0
|
||||
|
||||
; CHECK: VMULfd
|
||||
; > VMULfd common latency = 5
|
||||
; CHECK: Latency : 5
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; VMULfd read-advanced latency to VMLSfd = 0
|
||||
; CHECK-SAME: Latency=0
|
||||
|
||||
; CHECK-DEFAULT: VMLSfd
|
||||
; CHECK-FAST: VFMSfd
|
||||
; > VMLSfd common latency = 9
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; > VMLSfd read-advanced latency to the next VMLSfd = 4
|
||||
; CHECK-SAME: Latency=4
|
||||
|
||||
; CHECK-DEFAULT: VMLSfd
|
||||
; CHECK-FAST: VFMSfd
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; > VMLSfd not-optimized latency to VMOVRRD = 9
|
||||
; CHECK-SAME: Latency=9
|
||||
|
||||
; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLSS, VMLSS
|
||||
%mul1 = fmul <2 x float> %f1, %f2
|
||||
%mul2 = fmul <2 x float> %f3, %f4
|
||||
%mul3 = fmul <2 x float> %f5, %f6
|
||||
%sub1 = fsub <2 x float> %mul1, %mul2
|
||||
%sub2 = fsub <2 x float> %sub1, %mul3
|
||||
ret <2 x float> %sub2
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user