diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 163f4c5ae50..ef166a26c55 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -825,11 +825,11 @@ public: virtual bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const; - /// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is - /// loading 'Bytes' bytes from a location that is 'Dist' units away from the - /// location that the 'Base' load is loading from. - bool isConsecutiveLoad(SDNode *LD, SDNode *Base, unsigned Bytes, int Dist, - const MachineFrameInfo *MFI) const; + /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a + /// location that is 'Dist' units away from the location that the 'Base' load + /// is loading from. + bool isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, + int Dist, const MachineFrameInfo *MFI) const; /// PerformDAGCombine - This method will be invoked for all target nodes and /// for any target-independent nodes that the target has registered with diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5d1b2a3ea28..609ec82c5ad 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3626,30 +3626,29 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) { SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { assert(N->getOpcode() == ISD::BUILD_PAIR); - SDNode *LD1 = getBuildPairElt(N, 0); - if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) + LoadSDNode *LD1 = dyn_cast(getBuildPairElt(N, 0)); + LoadSDNode *LD2 = dyn_cast(getBuildPairElt(N, 1)); + if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) return SDValue(); MVT LD1VT = LD1->getValueType(0); - SDNode *LD2 = getBuildPairElt(N, 1); const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && // If both are volatile this would reduce the number of volatile loads. // If one is volatile it might be ok, but play conservative and bail out. - !cast(LD1)->isVolatile() && - !cast(LD2)->isVolatile() && + !LD1->isVolatile() && + !LD2->isVolatile() && TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) { - LoadSDNode *LD = cast(LD1); - unsigned Align = LD->getAlignment(); + unsigned Align = LD1->getAlignment(); unsigned NewAlign = TLI.getTargetData()-> getABITypeAlignment(VT.getTypeForMVT()); if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) - return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(), - LD->getSrcValue(), LD->getSrcValueOffset(), - false, Align); + return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), + LD1->getBasePtr(), LD1->getSrcValue(), + LD1->getSrcValueOffset(), false, Align); } return SDValue(); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3334e53f0fb..ab4cd515531 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2070,13 +2070,13 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA, } -/// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is -/// loading 'Bytes' bytes from a location that is 'Dist' units away from the -/// location that the 'Base' load is loading from. -bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base, - unsigned Bytes, int Dist, +/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a +/// location that is 'Dist' units away from the location that the 'Base' load +/// is loading from. +bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, + unsigned Bytes, int Dist, const MachineFrameInfo *MFI) const { - if (LD->getOperand(0).getNode() != Base->getOperand(0).getNode()) + if (LD->getChain() != Base->getChain()) return false; MVT VT = LD->getValueType(0); if (VT.getSizeInBits() / 8 != Bytes) @@ -2094,6 +2094,11 @@ bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base, if (FS != BFS || FS != (int)Bytes) return false; return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); } + if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) { + ConstantSDNode *V = dyn_cast(Loc.getOperand(1)); + if (V && (V->getSExtValue() == Dist*Bytes)) + return true; + } GlobalValue *GV1 = NULL; GlobalValue *GV2 = NULL; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 924155c4505..77c9f3d02a6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7675,8 +7675,9 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, if (Elt.getOpcode() == ISD::UNDEF) continue; - if (!TLI.isConsecutiveLoad(Elt.getNode(), Base, - EVT.getSizeInBits()/8, i, MFI)) + LoadSDNode *LD = cast(Elt); + LoadSDNode *LDBase = cast(Base); + if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI)) return false; } return true; @@ -7751,44 +7752,82 @@ static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, MVT VT = N->getValueType(0); MVT EVT = VT.getVectorElementType(); - if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) - // We are looking for load i64 and zero extend. We want to transform - // it before legalizer has a chance to expand it. Also look for i64 - // BUILD_PAIR bit casted to f64. - return SDValue(); - // This must be an insertion into a zero vector. - SDValue HighElt = N->getOperand(1); - if (!isZeroNode(HighElt)) - return SDValue(); - - // Value must be a load. - SDNode *Base = N->getOperand(0).getNode(); - if (!isa(Base)) { - if (Base->getOpcode() != ISD::BIT_CONVERT) + + // Before or during type legalization, we want to try and convert a + // build_vector of an i64 load and a zero value into vzext_movl before the + // legalizer can break it up. + // FIXME: does the case below remove the need to do this? + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) { + if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) return SDValue(); - Base = Base->getOperand(0).getNode(); - if (!isa(Base)) + + // This must be an insertion into a zero vector. + SDValue HighElt = N->getOperand(1); + if (!isZeroNode(HighElt)) return SDValue(); + + // Value must be a load. + SDNode *Base = N->getOperand(0).getNode(); + if (!isa(Base)) { + if (Base->getOpcode() != ISD::BIT_CONVERT) + return SDValue(); + Base = Base->getOperand(0).getNode(); + if (!isa(Base)) + return SDValue(); + } + + // Transform it into VZEXT_LOAD addr. + LoadSDNode *LD = cast(Base); + + // Load must not be an extload. + if (LD->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + + // Load type should legal type so we don't have to legalize it. + if (!TLI.isTypeLegal(VT)) + return SDValue(); + + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; + SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); + TargetLowering::TargetLoweringOpt TLO(DAG); + TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1)); + DCI.CommitTargetLoweringOpt(TLO); + return ResNode; } - // Transform it into VZEXT_LOAD addr. - LoadSDNode *LD = cast(Base); + // The type legalizer will have broken apart v2i64 build_vector created during + // widening before the code which handles that case is run. Look for build + // vector (load, load + 4, 0/undef, 0/undef) + if (VT == MVT::v4i32 || VT == MVT::v4f32) { + LoadSDNode *LD0 = dyn_cast(N->getOperand(0)); + LoadSDNode *LD1 = dyn_cast(N->getOperand(1)); + if (!LD0 || !LD1) + return SDValue(); + if (LD0->getExtensionType() != ISD::NON_EXTLOAD || + LD1->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + // Make sure the second elt is a consecutive load. + if (!TLI.isConsecutiveLoad(LD1, LD0, EVT.getSizeInBits()/8, 1, + DAG.getMachineFunction().getFrameInfo())) + return SDValue(); - // Load must not be an extload. - if (LD->getExtensionType() != ISD::NON_EXTLOAD) - return SDValue(); - - // Load type should legal type so we don't have to legalize it. - if (!TLI.isTypeLegal(VT)) - return SDValue(); - - SDVTList Tys = DAG.getVTList(VT, MVT::Other); - SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; - SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); - TargetLowering::TargetLoweringOpt TLO(DAG); - TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1)); - DCI.CommitTargetLoweringOpt(TLO); - return ResNode; + SDValue N2 = N->getOperand(2); + SDValue N3 = N->getOperand(3); + if (!isZeroNode(N2) && N2.getOpcode() != ISD::UNDEF) + return SDValue(); + if (!isZeroNode(N3) && N3.getOpcode() != ISD::UNDEF) + return SDValue(); + + SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); + SDValue Ops[] = { LD0->getChain(), LD0->getBasePtr() }; + SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); + TargetLowering::TargetLoweringOpt TLO(DAG); + TLO.CombineTo(SDValue(LD0, 1), ResNode.getValue(1)); + DCI.CommitTargetLoweringOpt(TLO); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode); + } + return SDValue(); } /// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. diff --git a/test/CodeGen/X86/dagcombine-buildvector.ll b/test/CodeGen/X86/dagcombine-buildvector.ll index c89a296d0db..b96fdfc03c6 100644 --- a/test/CodeGen/X86/dagcombine-buildvector.ll +++ b/test/CodeGen/X86/dagcombine-buildvector.ll @@ -1,13 +1,25 @@ -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f +; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn -disable-mmx -o %t -f ; RUN: grep unpcklpd %t | count 1 ; RUN: grep movapd %t | count 1 +; RUN: grep movaps %t | count 1 ; Shows a dag combine bug that will generate an illegal build vector ; with v2i64 build_vector i32, i32. -define void @test(<2 x double>* %dst, <4 x double> %src) { +define void @test(<2 x double>* %dst, <4 x double> %src) nounwind { entry: %tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> < i32 0, i32 2 > store <2 x double> %tmp7.i, <2 x double>* %dst ret void } + +define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind { +entry: + %tmp1 = load <4 x i16>* %src + %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> + %0 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3) + store <4 x i32> %0, <4 x i32>* %dest + ret void +} + +declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone