From b01975128c71aeafc3e74c866342d4b60ee0725e Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha <ahmed.bougacha@gmail.com>
Date: Thu, 10 Sep 2015 01:54:43 +0000
Subject: [PATCH] [AArch64] Match FI+offset in STNP addressing mode.

First, we need to teach isFrameOffsetLegal about STNP.
It already knew about the STP/LDP variants, but those were probably
never exercised, because it's only the load/store optimizer that
generates STP/LDP, and the only user of the method is frame lowering,
which runs earlier.
The STP/LDP cases were wrong: they didn't take into account the fact
that they return two results, not one, so the immediate offset will be
the 4th operand, not the 3rd.

Follow-up to r247234.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247236 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 13 +++++++++++++
 lib/Target/AArch64/AArch64InstrInfo.cpp    | 13 +++++++++++++
 test/CodeGen/AArch64/nontemporal.ll        |  8 ++------
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 367bbc3a482..ada3e852891 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -630,6 +630,15 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
                                                   SDValue &Base,
                                                   SDValue &OffImm) {
   SDLoc dl(N);
+  const DataLayout &DL = CurDAG->getDataLayout();
+  const TargetLowering *TLI = getTargetLowering();
+  if (N.getOpcode() == ISD::FrameIndex) {
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+    OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
+    return true;
+  }
+
   // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
   // selected here doesn't support labels/immediates, only base+offset.
 
@@ -640,6 +649,10 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
       if ((RHSC & (Size - 1)) == 0 && RHSC >= (-0x40 << Scale) &&
           RHSC < (0x40 << Scale)) {
         Base = N.getOperand(0);
+        if (Base.getOpcode() == ISD::FrameIndex) {
+          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+          Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+        }
         OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
         return true;
       }
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 2013f8beace..a41007cab5c 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2260,11 +2260,19 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
   case AArch64::LDPDi:
   case AArch64::STPXi:
   case AArch64::STPDi:
+  case AArch64::LDNPXi:
+  case AArch64::LDNPDi:
+  case AArch64::STNPXi:
+  case AArch64::STNPDi:
+    ImmIdx = 3;
     IsSigned = true;
     Scale = 8;
     break;
   case AArch64::LDPQi:
   case AArch64::STPQi:
+  case AArch64::LDNPQi:
+  case AArch64::STNPQi:
+    ImmIdx = 3;
     IsSigned = true;
     Scale = 16;
     break;
@@ -2272,6 +2280,11 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
   case AArch64::LDPSi:
   case AArch64::STPWi:
   case AArch64::STPSi:
+  case AArch64::LDNPWi:
+  case AArch64::LDNPSi:
+  case AArch64::STNPWi:
+  case AArch64::STNPSi:
+    ImmIdx = 3;
     IsSigned = true;
     Scale = 4;
     break;
diff --git a/test/CodeGen/AArch64/nontemporal.ll b/test/CodeGen/AArch64/nontemporal.ll
index af553be92c5..e1d82ae5777 100644
--- a/test/CodeGen/AArch64/nontemporal.ll
+++ b/test/CodeGen/AArch64/nontemporal.ll
@@ -313,9 +313,7 @@ declare void @dummy(<4 x float>*)
 
 define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 {
 ; CHECK-LABEL: test_stnp_v4f32_offset_alloca:
-; CHECK:       mov x29, sp
-; CHECK:       mov x[[PTR:[0-9]+]], sp
-; CHECK-NEXT:  stnp d0, d{{.*}}, [x[[PTR]]]
+; CHECK:       stnp d0, d{{.*}}, [sp]
 ; CHECK-NEXT:  mov x0, sp
 ; CHECK-NEXT:  bl _dummy
   %tmp0 = alloca <4 x float>
@@ -326,9 +324,7 @@ define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 {
 
 define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 {
 ; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2:
-; CHECK:       mov x29, sp
-; CHECK:       mov x[[PTR:[0-9]+]], sp
-; CHECK-NEXT:  stnp d0, d{{.*}}, [x[[PTR]], #16]
+; CHECK:       stnp d0, d{{.*}}, [sp, #16]
 ; CHECK-NEXT:  mov x0, sp
 ; CHECK-NEXT:  bl _dummy
   %tmp0 = alloca <4 x float>, i32 2