Shifter ops are not always free. Do not fold them (especially to form

complex load / store addressing mode) when they have higher cost and when they have more than one use. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117509 91177308-0d34-0410-b5e6-96231b3b80d8
2025-01-21 01:06:46 +00:00 · 2010-10-27 23:41:30 +00:00 · 2010-10-27 23:41:30 +00:00 · f40deed62f
commit f40deed62f
parent de5fa932b9
4 changed files with 183 additions and 25 deletions
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@ -78,8 +78,12 @@ public:

  SDNode *Select(SDNode *N);

+  bool isShifterOpProfitable(const SDValue &Shift,
+                             ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  bool SelectShifterOperandReg(SDValue N, SDValue &A,
                               SDValue &B, SDValue &C);
+  bool SelectShiftShifterOperandReg(SDValue N, SDValue &A,
+                                    SDValue &B, SDValue &C);
  bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
  bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);

@ -246,6 +250,17 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 }


+bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
+                                            ARM_AM::ShiftOpc ShOpcVal,
+                                            unsigned ShAmt) {
+  if (!Subtarget->isCortexA9())
+    return true;
+  if (Shift.hasOneUse())
+    return true;
+  // R << 2 is free.
+  return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
+}
+
 bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
                                              SDValue &BaseReg,
                                              SDValue &ShReg,
@ -261,6 +276,32 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,

  BaseReg = N.getOperand(0);
  unsigned ShImmVal = 0;
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    ShReg = CurDAG->getRegister(0, MVT::i32);
+    ShImmVal = RHS->getZExtValue() & 31;
+  } else {
+    ShReg = N.getOperand(1);
+    if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal))
+      return false;
+  }
+  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+                                  MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N,
+                                                   SDValue &BaseReg,
+                                                   SDValue &ShReg,
+                                                   SDValue &Opc) {
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  // Do not check isShifterOpProfitable. This must return true.
  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    ShReg = CurDAG->getRegister(0, MVT::i32);
    ShImmVal = RHS->getZExtValue() & 31;
@ -321,7 +362,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,

 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
                                      SDValue &Opc) {
-  if (N.getOpcode() == ISD::MUL) {
+  if (N.getOpcode() == ISD::MUL &&
+      (!Subtarget->isCortexA9() || N.hasOneUse())) {
    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
      // X * [3,5,9] -> X + X * [2,4,8] etc.
      int RHSC = (int)RHS->getZExtValue();
@ -357,6 +399,10 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
    }
  }

+  if (Subtarget->isCortexA9() && !N.hasOneUse())
+    // Compute R +/- (R << N) and reuse it.
+    return false;
+
  // Otherwise this is R +/- [possibly shifted] R.
  ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
@ -371,14 +417,20 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
    if (ConstantSDNode *Sh =
           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
      ShAmt = Sh->getZExtValue();
-      Offset = N.getOperand(1).getOperand(0);
+      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+        Offset = N.getOperand(1).getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
    } else {
      ShOpcVal = ARM_AM::no_shift;
    }
  }

  // Try matching (R shl C) + (R).
-  if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
+  if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift &&
+      !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
    if (ShOpcVal != ARM_AM::no_shift) {
      // Check to see if the RHS of the shift is a constant, if not, we can't
@ -386,8 +438,15 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
      if (ConstantSDNode *Sh =
          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
        ShAmt = Sh->getZExtValue();
-        Offset = N.getOperand(0).getOperand(0);
-        Base = N.getOperand(1);
+        if (!Subtarget->isCortexA9() ||
+            (N.hasOneUse() &&
+             isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+          Offset = N.getOperand(0).getOperand(0);
+          Base = N.getOperand(1);
+        } else {
+          ShAmt = 0;
+          ShOpcVal = ARM_AM::no_shift;
+        }
      } else {
        ShOpcVal = ARM_AM::no_shift;
      }
@ -408,7 +467,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
                                                     SDValue &Base,
                                                     SDValue &Offset,
                                                     SDValue &Opc) {
-  if (N.getOpcode() == ISD::MUL) {
+  if (N.getOpcode() == ISD::MUL &&
+      (!Subtarget->isCortexA9() || N.hasOneUse())) {
    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
      // X * [3,5,9] -> X + X * [2,4,8] etc.
      int RHSC = (int)RHS->getZExtValue();
@ -474,6 +534,16 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
    }
  }

+  if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+    // Compute R +/- (R << N) and reuse it.
+    Base = N;
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+                                                      ARM_AM::no_shift),
+                                    MVT::i32);
+    return AM2_BASE;
+  }
+
  // Otherwise this is R +/- [possibly shifted] R.
  ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
@ -488,14 +558,20 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
    if (ConstantSDNode *Sh =
           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
      ShAmt = Sh->getZExtValue();
-      Offset = N.getOperand(1).getOperand(0);
+      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+        Offset = N.getOperand(1).getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
    } else {
      ShOpcVal = ARM_AM::no_shift;
    }
  }

  // Try matching (R shl C) + (R).
-  if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
+  if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift &&
+      !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
    if (ShOpcVal != ARM_AM::no_shift) {
      // Check to see if the RHS of the shift is a constant, if not, we can't
@ -503,8 +579,15 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
      if (ConstantSDNode *Sh =
          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
        ShAmt = Sh->getZExtValue();
-        Offset = N.getOperand(0).getOperand(0);
-        Base = N.getOperand(1);
+        if (!Subtarget->isCortexA9() ||
+            (N.hasOneUse() &&
+             isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+          Offset = N.getOperand(0).getOperand(0);
+          Base = N.getOperand(1);
+        } else {
+          ShAmt = 0;
+          ShOpcVal = ARM_AM::no_shift;
+        }
      } else {
        ShOpcVal = ARM_AM::no_shift;
      }
@ -543,7 +626,12 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
    // it.
    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
      ShAmt = Sh->getZExtValue();
-      Offset = N.getOperand(0);
+      if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
+        Offset = N.getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
    } else {
      ShOpcVal = ARM_AM::no_shift;
    }
@ -959,6 +1047,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
      return false;
  }

+  if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+    // Compute R + (R << [1,2,3]) and reuse it.
+    Base = N;
+    return false;
+  }
+
  // Look for (R + R) or (R + (R << [1,2,3])).
  unsigned ShAmt = 0;
  Base   = N.getOperand(0);
@ -977,11 +1071,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
    // it.
    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
      ShAmt = Sh->getZExtValue();
-      if (ShAmt >= 4) {
+      if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
+        OffReg = OffReg.getOperand(0);
+      else {
        ShAmt = 0;
        ShOpcVal = ARM_AM::no_shift;
-      } else
-        OffReg = OffReg.getOperand(0);
+      }
    } else {
      ShOpcVal = ARM_AM::no_shift;
    }
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@ -325,6 +325,13 @@ def so_reg : Operand<i32>,    // reg reg imm
  let PrintMethod = "printSORegOperand";
  let MIOperandInfo = (ops GPR, GPR, i32imm);
 }
+def shift_so_reg : Operand<i32>,    // reg reg imm
+                   ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
+                                  [shl,srl,sra,rotr]> {
+  string EncoderMethod = "getSORegOpValue";
+  let PrintMethod = "printSORegOperand";
+  let MIOperandInfo = (ops GPR, GPR, i32imm);
+}

 // so_imm - Match a 32-bit shifter_operand immediate operand, which is an
 // 8-bit immediate rotated by an arbitrary number of bits.  so_imm values are
@ -1715,9 +1722,10 @@ def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
  let Inst{15-12} = Rd;
 }

-def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins so_reg:$src),
+def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
                DPSoRegFrm, IIC_iMOVsr,
-                "mov", "\t$Rd, $src", [(set GPR:$Rd, so_reg:$src)]>, UnaryDP {
+                "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>,
+                UnaryDP {
  bits<4> Rd;
  bits<12> src;
  let Inst{15-12} = Rd;
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@ -1,18 +1,72 @@
-; RUN: llc < %s -march=arm | grep add | grep lsl
-; RUN: llc < %s -march=arm | grep bic | grep asr
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
+; rdar://8576755


 define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
-        %shift.upgrd.1 = zext i8 %sh to i32             ; <i32> [#uses=1]
-        %A = shl i32 %Y, %shift.upgrd.1         ; <i32> [#uses=1]
-        %B = add i32 %X, %A             ; <i32> [#uses=1]
+; A8: test1:
+; A8: add r0, r0, r1, lsl r2
+
+; A9: test1:
+; A9: add r0, r0, r1, lsl r2
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = shl i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
        ret i32 %B
 }

 define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
-        %shift.upgrd.2 = zext i8 %sh to i32             ; <i32> [#uses=1]
-        %A = ashr i32 %Y, %shift.upgrd.2                ; <i32> [#uses=1]
-        %B = xor i32 %A, -1             ; <i32> [#uses=1]
-        %C = and i32 %X, %B             ; <i32> [#uses=1]
+; A8: test2:
+; A8: bic r0, r0, r1, asr r2
+
+; A9: test2:
+; A9: bic r0, r0, r1, asr r2
+        %shift.upgrd.2 = zext i8 %sh to i32
+        %A = ashr i32 %Y, %shift.upgrd.2
+        %B = xor i32 %A, -1
+        %C = and i32 %X, %B
        ret i32 %C
 }
+
+define i32 @test3(i32 %base, i32 %base2, i32 %offset) {
+entry:
+; A8: test3:
+; A8: ldr r0, [r0, r2, lsl #2]
+; A8: ldr r1, [r1, r2, lsl #2]
+
+; lsl #2 is free
+; A9: test3:
+; A9: ldr r1, [r1, r2, lsl #2]
+; A9: ldr r0, [r0, r2, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i32*
+        %tmp4 = add i32 %base2, %tmp1
+        %tmp5 = inttoptr i32 %tmp4 to i32*
+        %tmp6 = load i32* %tmp3
+        %tmp7 = load i32* %tmp5
+        %tmp8 = add i32 %tmp7, %tmp6
+        ret i32 %tmp8
+}
+
+declare i8* @malloc(...)
+
+define fastcc void @test4() nounwind {
+entry:
+; A8: test4:
+; A8: ldr r1, [r0, r0, lsl #2]
+; A8: str r1, [r0, r0, lsl #2]
+
+; A9: test4:
+; A9: add r0, r0, r0, lsl #2
+; A9: ldr r1, [r0]
+; A9: str r1, [r0]
+  %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
+  %1 = bitcast i8* %0 to i32*
+  %2 = sext i16 undef to i32
+  %3 = getelementptr inbounds i32* %1, i32 %2
+  %4 = load i32* %3, align 4
+  %5 = add nsw i32 %4, 1
+  store i32 %5, i32* %3, align 4
+  ret void
+}
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@ -586,6 +586,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,

  MISC("brtarget", "kOperandTypeARMBranchTarget");                // ?
  MISC("so_reg", "kOperandTypeARMSoReg");                         // R, R, I
+  MISC("shift_so_reg", "kOperandTypeARMSoReg");                   // R, R, I
  MISC("t2_so_reg", "kOperandTypeThumb2SoReg");                   // R, I
  MISC("so_imm", "kOperandTypeARMSoImm");                         // I
  MISC("rot_imm", "kOperandTypeARMRotImm");                       // I