mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-30 07:00:45 +00:00
[AArch64] Add support for NEON scalar floating-point reciprocal estimate,
reciprocal exponent, and reciprocal square root estimate instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192242 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
25180dc319
commit
c976500793
@ -164,4 +164,7 @@ def int_aarch64_neon_vcvtf32_u32 :
|
||||
Intrinsic<[llvm_v1f32_ty], [llvm_v1i32_ty], [IntrNoMem]>;
|
||||
def int_aarch64_neon_vcvtf64_u64 :
|
||||
Intrinsic<[llvm_v1f64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
|
||||
|
||||
// Scalar Floating-point Reciprocal Exponent
|
||||
def int_aarch64_neon_vrecpx : Neon_1Arg_Intrinsic;
|
||||
}
|
||||
|
@ -3106,16 +3106,25 @@ multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
|
||||
[], NoItinerary>;
|
||||
}
|
||||
|
||||
multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator Sopnode,
|
||||
SDPatternOperator Dopnode,
|
||||
Instruction INSTS,
|
||||
Instruction INSTD> {
|
||||
multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
|
||||
SDPatternOperator Dopnode,
|
||||
Instruction INSTS,
|
||||
Instruction INSTD> {
|
||||
def : Pat<(v1f32 (Sopnode (v1i32 FPR32:$Rn))),
|
||||
(INSTS FPR32:$Rn)>;
|
||||
def : Pat<(v1f64 (Dopnode (v1i64 FPR64:$Rn))),
|
||||
(INSTD FPR64:$Rn)>;
|
||||
}
|
||||
|
||||
multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
|
||||
Instruction INSTS,
|
||||
Instruction INSTD> {
|
||||
def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn))),
|
||||
(INSTS FPR32:$Rn)>;
|
||||
def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
|
||||
(INSTD FPR64:$Rn)>;
|
||||
}
|
||||
|
||||
// Scalar Integer Add
|
||||
let isCommutable = 1 in {
|
||||
def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
|
||||
@ -3258,15 +3267,30 @@ defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
|
||||
|
||||
// Scalar Signed Integer Convert To Floating-point
|
||||
defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
|
||||
defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vcvtf32_s32,
|
||||
int_aarch64_neon_vcvtf64_s64,
|
||||
SCVTFss, SCVTFdd>;
|
||||
defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_s32,
|
||||
int_aarch64_neon_vcvtf64_s64,
|
||||
SCVTFss, SCVTFdd>;
|
||||
|
||||
// Scalar Unsigned Integer Convert To Floating-point
|
||||
defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
|
||||
defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vcvtf32_u32,
|
||||
int_aarch64_neon_vcvtf64_u64,
|
||||
UCVTFss, UCVTFdd>;
|
||||
defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_u32,
|
||||
int_aarch64_neon_vcvtf64_u64,
|
||||
UCVTFss, UCVTFdd>;
|
||||
|
||||
// Scalar Floating-point Reciprocal Estimate
|
||||
defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
|
||||
defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrecpe,
|
||||
FRECPEss, FRECPEdd>;
|
||||
|
||||
// Scalar Floating-point Reciprocal Exponent
|
||||
defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
|
||||
defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
|
||||
FRECPXss, FRECPXdd>;
|
||||
|
||||
// Scalar Floating-point Reciprocal Square Root Estimate
|
||||
defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
|
||||
defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrsqrte,
|
||||
FRSQRTEss, FRSQRTEdd>;
|
||||
|
||||
// Scalar Reduce Pairwise
|
||||
|
||||
|
@ -45,3 +45,72 @@ define double @test_vrsqrtsd_f64(double %a, double %b) {
|
||||
|
||||
declare <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float>, <1 x float>)
|
||||
declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>)
|
||||
|
||||
define float @test_vrecpes_f32(float %a) {
|
||||
; CHECK: test_vrecpes_f32
|
||||
; CHECK: frecpe {{s[0-9]+}}, {{s[0-9]+}}
|
||||
entry:
|
||||
%vrecpe.i = insertelement <1 x float> undef, float %a, i32 0
|
||||
%vrecpe1.i = tail call <1 x float> @llvm.arm.neon.vrecpe.v1f32(<1 x float> %vrecpe.i)
|
||||
%0 = extractelement <1 x float> %vrecpe1.i, i32 0
|
||||
ret float %0
|
||||
}
|
||||
|
||||
define double @test_vrecped_f64(double %a) {
|
||||
; CHECK: test_vrecped_f64
|
||||
; CHECK: frecpe {{d[0-9]+}}, {{d[0-9]+}}
|
||||
entry:
|
||||
%vrecpe.i = insertelement <1 x double> undef, double %a, i32 0
|
||||
%vrecpe1.i = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %vrecpe.i)
|
||||
%0 = extractelement <1 x double> %vrecpe1.i, i32 0
|
||||
ret double %0
|
||||
}
|
||||
|
||||
declare <1 x float> @llvm.arm.neon.vrecpe.v1f32(<1 x float>)
|
||||
declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>)
|
||||
|
||||
define float @test_vrecpxs_f32(float %a) {
|
||||
; CHECK: test_vrecpxs_f32
|
||||
; CHECK: frecpx {{s[0-9]+}}, {{s[0-9]+}}
|
||||
entry:
|
||||
%vrecpx.i = insertelement <1 x float> undef, float %a, i32 0
|
||||
%vrecpx1.i = tail call <1 x float> @llvm.aarch64.neon.vrecpx.v1f32(<1 x float> %vrecpx.i)
|
||||
%0 = extractelement <1 x float> %vrecpx1.i, i32 0
|
||||
ret float %0
|
||||
}
|
||||
|
||||
define double @test_vrecpxd_f64(double %a) {
|
||||
; CHECK: test_vrecpxd_f64
|
||||
; CHECK: frecpx {{d[0-9]+}}, {{d[0-9]+}}
|
||||
entry:
|
||||
%vrecpx.i = insertelement <1 x double> undef, double %a, i32 0
|
||||
%vrecpx1.i = tail call <1 x double> @llvm.aarch64.neon.vrecpx.v1f64(<1 x double> %vrecpx.i)
|
||||
%0 = extractelement <1 x double> %vrecpx1.i, i32 0
|
||||
ret double %0
|
||||
}
|
||||
|
||||
declare <1 x float> @llvm.aarch64.neon.vrecpx.v1f32(<1 x float>)
|
||||
declare <1 x double> @llvm.aarch64.neon.vrecpx.v1f64(<1 x double>)
|
||||
|
||||
define float @test_vrsqrtes_f32(float %a) {
|
||||
; CHECK: test_vrsqrtes_f32
|
||||
; CHECK: frsqrte {{s[0-9]+}}, {{s[0-9]+}}
|
||||
entry:
|
||||
%vrsqrte.i = insertelement <1 x float> undef, float %a, i32 0
|
||||
%vrsqrte1.i = tail call <1 x float> @llvm.arm.neon.vrsqrte.v1f32(<1 x float> %vrsqrte.i)
|
||||
%0 = extractelement <1 x float> %vrsqrte1.i, i32 0
|
||||
ret float %0
|
||||
}
|
||||
|
||||
define double @test_vrsqrted_f64(double %a) {
|
||||
; CHECK: test_vrsqrted_f64
|
||||
; CHECK: frsqrte {{d[0-9]+}}, {{d[0-9]+}}
|
||||
entry:
|
||||
%vrsqrte.i = insertelement <1 x double> undef, double %a, i32 0
|
||||
%vrsqrte1.i = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %vrsqrte.i)
|
||||
%0 = extractelement <1 x double> %vrsqrte1.i, i32 0
|
||||
ret double %0
|
||||
}
|
||||
|
||||
declare <1 x float> @llvm.arm.neon.vrsqrte.v1f32(<1 x float>)
|
||||
declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>)
|
||||
|
@ -213,6 +213,47 @@
|
||||
// CHECK-ERROR: movi v1.16b, #256
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Reciprocal Estimate
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
frecpe s19, h14
|
||||
frecpe d13, s13
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: frecpe s19, h14
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: frecpe d13, s13
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Reciprocal Exponent
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
frecpx s18, h10
|
||||
frecpx d16, s19
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: frecpx s18, h10
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: frecpx d16, s19
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Reciprocal Square Root Estimate
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
frsqrte s22, h13
|
||||
frsqrte d21, s12
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: frsqrte s22, h13
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: frsqrte d21, s12
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Move Immediate - bytemask, per doubleword
|
||||
|
@ -21,3 +21,33 @@
|
||||
|
||||
// CHECK: frsqrts s21, s5, s12 // encoding: [0xb5,0xfc,0xac,0x5e]
|
||||
// CHECK: frsqrts d8, d22, d18 // encoding: [0xc8,0xfe,0xf2,0x5e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Reciprocal Estimate
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
frecpe s19, s14
|
||||
frecpe d13, d13
|
||||
|
||||
// CHECK: frecpe s19, s14 // encoding: [0xd3,0xd9,0xa1,0x5e]
|
||||
// CHECK: frecpe d13, d13 // encoding: [0xad,0xd9,0xe1,0x5e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Reciprocal Exponent
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
frecpx s18, s10
|
||||
frecpx d16, d19
|
||||
|
||||
// CHECK: frecpx s18, s10 // encoding: [0x52,0xf9,0xa1,0x5e]
|
||||
// CHECK: frecpx d16, d19 // encoding: [0x70,0xfa,0xe1,0x5e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Reciprocal Square Root Estimate
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
frsqrte s22, s13
|
||||
frsqrte d21, d12
|
||||
|
||||
// CHECK: frsqrte s22, s13 // encoding: [0xb6,0xd9,0xa1,0x7e]
|
||||
// CHECK: frsqrte d21, d12 // encoding: [0x95,0xd9,0xe1,0x7e]
|
||||
|
@ -1508,3 +1508,27 @@
|
||||
# CHECK: ucvtf d21, d14
|
||||
0xb6,0xd9,0x21,0x7e
|
||||
0xd5,0xd9,0x61,0x7e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Floating-point Reciprocal Estimate
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: frecpe s19, s14
|
||||
# CHECK: frecpe d13, d13
|
||||
0xd3,0xd9,0xa1,0x5e
|
||||
0xad,0xd9,0xe1,0x5e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Floating-point Reciprocal Exponent
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: frecpx s18, s10
|
||||
# CHECK: frecpx d16, d19
|
||||
0x52,0xf9,0xa1,0x5e
|
||||
0x70,0xfa,0xe1,0x5e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Floating-point Reciprocal Square Root Estimate
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: frsqrte s22, s13
|
||||
# CHECK: frsqrte d21, d12
|
||||
0xb6,0xd9,0xa1,0x7e
|
||||
0x95,0xd9,0xe1,0x7e
|
||||
|
Loading…
Reference in New Issue
Block a user