ARM64: shuffle patterns around for fmin/fmax & add tests.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205205 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-20 11:08:27 +00:00 · 2014-03-31 15:46:30 +00:00 · 2014-03-31 15:46:30 +00:00 · 4417e07e39
commit 4417e07e39
parent 576c3f709f
2 changed files with 109 additions and 8 deletions
--- a/lib/Target/ARM64/ARM64InstrInfo.td
+++ b/lib/Target/ARM64/ARM64InstrInfo.td
@ -2857,12 +2857,20 @@ def : Pat<(f32 (int_arm64_neon_faddv (v4f32 V128:$Rn))),
          (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
 def : Pat<(f64 (int_arm64_neon_faddv (v2f64 V128:$Rn))),
          (FADDPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_arm64_neon_fmaxnmv (v2f32 V64:$Rn))),
+          (FMAXNMPv2i32p V64:$Rn)>;
 def : Pat<(f64 (int_arm64_neon_fmaxnmv (v2f64 V128:$Rn))),
          (FMAXNMPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_arm64_neon_fmaxv (v2f32 V64:$Rn))),
+          (FMAXPv2i32p V64:$Rn)>;
 def : Pat<(f64 (int_arm64_neon_fmaxv (v2f64 V128:$Rn))),
          (FMAXPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_arm64_neon_fminnmv (v2f32 V64:$Rn))),
+          (FMINNMPv2i32p V64:$Rn)>;
 def : Pat<(f64 (int_arm64_neon_fminnmv (v2f64 V128:$Rn))),
          (FMINNMPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_arm64_neon_fminv (v2f32 V64:$Rn))),
+          (FMINPv2i32p V64:$Rn)>;
 def : Pat<(f64 (int_arm64_neon_fminv (v2f64 V128:$Rn))),
          (FMINPv2i64p V128:$Rn)>;

@ -3072,17 +3080,9 @@ defm UMINV   : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
 defm SADDLV  : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
 defm UADDLV  : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
 defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_arm64_neon_fmaxnmv>;
-def : Pat<(f32 (int_arm64_neon_fmaxnmv (v2f32 V64:$Rn))),
-          (EXTRACT_SUBREG (FMAXNMPv2f32 V64:$Rn, V64:$Rn), ssub)>;
 defm FMAXV   : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_arm64_neon_fmaxv>;
-def : Pat<(f32 (int_arm64_neon_fmaxv (v2f32 V64:$Rn))),
-          (EXTRACT_SUBREG (FMAXPv2f32 V64:$Rn, V64:$Rn), ssub)>;
 defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_arm64_neon_fminnmv>;
-def : Pat<(f32 (int_arm64_neon_fminnmv (v2f32 V64:$Rn))),
-          (EXTRACT_SUBREG (FMINNMPv2f32 V64:$Rn, V64:$Rn), ssub)>;
 defm FMINV   : SIMDAcrossLanesS<0b01111, 1, "fminv", int_arm64_neon_fminv>;
-def : Pat<(f32 (int_arm64_neon_fminv (v2f32 V64:$Rn))),
-          (EXTRACT_SUBREG (FMINPv2f32 V64:$Rn, V64:$Rn), ssub)>;

 multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> {
 // If there is a sign extension after this intrinsic, consume it as smov already
--- a/test/CodeGen/ARM64/fminv.ll
+++ b/test/CodeGen/ARM64/fminv.ll
@ -0,0 +1,101 @@
+; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
+
+define float @test_fminv_v2f32(<2 x float> %in) {
+; CHECK: test_fminv_v2f32:
+; CHECK: fminp s0, v0.2s
+  %min = call float @llvm.arm64.neon.fminv.f32.v2f32(<2 x float> %in)
+  ret float %min
+}
+
+define float @test_fminv_v4f32(<4 x float> %in) {
+; CHECK: test_fminv_v4f32:
+; CHECK: fminv s0, v0.4s
+  %min = call float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float> %in)
+  ret float %min
+}
+
+define double @test_fminv_v2f64(<2 x double> %in) {
+; CHECK: test_fminv_v2f64:
+; CHECK: fminp d0, v0.2d
+  %min = call double @llvm.arm64.neon.fminv.f64.v2f64(<2 x double> %in)
+  ret double %min
+}
+
+declare float @llvm.arm64.neon.fminv.f32.v2f32(<2 x float>)
+declare float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float>)
+declare double @llvm.arm64.neon.fminv.f64.v2f64(<2 x double>)
+
+define float @test_fmaxv_v2f32(<2 x float> %in) {
+; CHECK: test_fmaxv_v2f32:
+; CHECK: fmaxp s0, v0.2s
+  %max = call float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float> %in)
+  ret float %max
+}
+
+define float @test_fmaxv_v4f32(<4 x float> %in) {
+; CHECK: test_fmaxv_v4f32:
+; CHECK: fmaxv s0, v0.4s
+  %max = call float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float> %in)
+  ret float %max
+}
+
+define double @test_fmaxv_v2f64(<2 x double> %in) {
+; CHECK: test_fmaxv_v2f64:
+; CHECK: fmaxp d0, v0.2d
+  %max = call double @llvm.arm64.neon.fmaxv.f64.v2f64(<2 x double> %in)
+  ret double %max
+}
+
+declare float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float>)
+declare float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float>)
+declare double @llvm.arm64.neon.fmaxv.f64.v2f64(<2 x double>)
+
+define float @test_fminnmv_v2f32(<2 x float> %in) {
+; CHECK: test_fminnmv_v2f32:
+; CHECK: fminnmp s0, v0.2s
+  %minnm = call float @llvm.arm64.neon.fminnmv.f32.v2f32(<2 x float> %in)
+  ret float %minnm
+}
+
+define float @test_fminnmv_v4f32(<4 x float> %in) {
+; CHECK: test_fminnmv_v4f32:
+; CHECK: fminnmv s0, v0.4s
+  %minnm = call float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float> %in)
+  ret float %minnm
+}
+
+define double @test_fminnmv_v2f64(<2 x double> %in) {
+; CHECK: test_fminnmv_v2f64:
+; CHECK: fminnmp d0, v0.2d
+  %minnm = call double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double> %in)
+  ret double %minnm
+}
+
+declare float @llvm.arm64.neon.fminnmv.f32.v2f32(<2 x float>)
+declare float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float>)
+declare double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double>)
+
+define float @test_fmaxnmv_v2f32(<2 x float> %in) {
+; CHECK: test_fmaxnmv_v2f32:
+; CHECK: fmaxnmp s0, v0.2s
+  %maxnm = call float @llvm.arm64.neon.fmaxnmv.f32.v2f32(<2 x float> %in)
+  ret float %maxnm
+}
+
+define float @test_fmaxnmv_v4f32(<4 x float> %in) {
+; CHECK: test_fmaxnmv_v4f32:
+; CHECK: fmaxnmv s0, v0.4s
+  %maxnm = call float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float> %in)
+  ret float %maxnm
+}
+
+define double @test_fmaxnmv_v2f64(<2 x double> %in) {
+; CHECK: test_fmaxnmv_v2f64:
+; CHECK: fmaxnmp d0, v0.2d
+  %maxnm = call double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double> %in)
+  ret double %maxnm
+}
+
+declare float @llvm.arm64.neon.fmaxnmv.f32.v2f32(<2 x float>)
+declare float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float>)
+declare double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double>)