diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 6e6fedeb2e4..389f2ea5ff1 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1367,9 +1367,29 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - if (SimplifyDemandedBits(Op.getOperand(0), NewMask.lshr(ShAmt), + if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), KnownZero, KnownOne, TLO, Depth+1)) return true; + + // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits + // are not demanded. This will likely allow the anyext to be folded away. + if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) { + SDValue InnerOp = InOp.getNode()->getOperand(0); + EVT InnerVT = InnerOp.getValueType(); + if ((APInt::getHighBitsSet(BitWidth, + BitWidth - InnerVT.getSizeInBits()) & + DemandedMask) == 0 && + isTypeDesirableForOp(ISD::SHL, InnerVT)) { + SDValue NarrowShl = + TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp, + TLO.DAG.getConstant(ShAmt, InnerVT)); + return + TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), + NarrowShl)); + } + } + KnownZero <<= SA->getZExtValue(); KnownOne <<= SA->getZExtValue(); // low bits known zero. diff --git a/test/CodeGen/X86/shl-anyext.ll b/test/CodeGen/X86/shl-anyext.ll new file mode 100644 index 00000000000..6dc95317d14 --- /dev/null +++ b/test/CodeGen/X86/shl-anyext.ll @@ -0,0 +1,18 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s + +; Codegen should be able to use a 32-bit shift instead of a 64-bit shift. +; CHECK: shll $16 + +define fastcc void @test(i32 %level, i64 %a, i64 %b, i64 %c, i64 %d, i32* %p) nounwind { +if.end523: ; preds = %if.end453 + %conv7981749 = zext i32 %level to i64 ; [#uses=1] + %and799 = shl i64 %conv7981749, 16 ; [#uses=1] + %shl800 = and i64 %and799, 16711680 ; [#uses=1] + %or801 = or i64 %shl800, %a ; [#uses=1] + %or806 = or i64 %or801, %b ; [#uses=1] + %or811 = or i64 %or806, %c ; [#uses=1] + %or819 = or i64 %or811, %d ; [#uses=1] + %conv820 = trunc i64 %or819 to i32 ; [#uses=1] + store i32 %conv820, i32* %p + ret void +}