diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp index cfff0019b8d..69a229e32f4 100644 --- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp +++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @@ -98,15 +98,14 @@ private: /// This reordering exposes to optimizeMemoryInstruction more /// optimization opportunities on loads and stores. /// - /// Returns true if this function succesfully hoists an eliminable - /// addrspacecast or V is already such an addrspacecast. - /// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X, - /// indices)". - bool hoistAddrSpaceCastFrom(Value *V, int Depth = 0); + /// If this function succesfully hoists an eliminable addrspacecast or V is + /// already such an addrspacecast, it returns the transformed value (which is + /// guaranteed to be an addrspacecast); otherwise, it returns nullptr. + Value *hoistAddrSpaceCastFrom(Value *V, int Depth = 0); /// Helper function for GEPs. - bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth); + Value *hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth); /// Helper function for bitcasts. - bool hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth); + Value *hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth); }; } @@ -143,17 +142,19 @@ static bool isEliminableAddrSpaceCast(Value *V) { DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC); } -bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(GEPOperator *GEP, - int Depth) { - if (!hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1)) - return false; +Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( + GEPOperator *GEP, int Depth) { + Value *NewOperand = + hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1); + if (NewOperand == nullptr) + return nullptr; - // That hoistAddrSpaceCastFrom succeeds implies GEP's pointer operand is now - // an eliminable addrspacecast. - assert(isEliminableAddrSpaceCast(GEP->getPointerOperand())); - Operator *Cast = cast(GEP->getPointerOperand()); + // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr. + assert(isEliminableAddrSpaceCast(NewOperand)); + Operator *Cast = cast(NewOperand); SmallVector Indices(GEP->idx_begin(), GEP->idx_end()); + Value *NewASC; if (Instruction *GEPI = dyn_cast(GEP)) { // GEP = gep (addrspacecast X), indices // => @@ -163,30 +164,31 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(GEPOperator *GEP, GEP->getSourceElementType(), Cast->getOperand(0), Indices, "", GEPI); NewGEP->setIsInBounds(GEP->isInBounds()); - Value *NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI); + NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI); NewASC->takeName(GEP); + // Without RAUWing GEP, the compiler would visit GEP again and emit + // redundant instructions. This is exercised in test @rauw in + // access-non-generic.ll. GEP->replaceAllUsesWith(NewASC); } else { // GEP is a constant expression. Constant *NewGEP = ConstantExpr::getGetElementPtr( GEP->getSourceElementType(), cast(Cast->getOperand(0)), Indices, GEP->isInBounds()); - GEP->replaceAllUsesWith( - ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType())); + NewASC = ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType()); } - - return true; + return NewASC; } -bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast( +Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast( BitCastOperator *BC, int Depth) { - if (!hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1)) - return false; + Value *NewOperand = hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1); + if (NewOperand == nullptr) + return nullptr; - // That hoistAddrSpaceCastFrom succeeds implies BC's source operand is now - // an eliminable addrspacecast. - assert(isEliminableAddrSpaceCast(BC->getOperand(0))); - Operator *Cast = cast(BC->getOperand(0)); + // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr. + assert(isEliminableAddrSpaceCast(NewOperand)); + Operator *Cast = cast(NewOperand); // Cast = addrspacecast Src // BC = bitcast Cast @@ -197,31 +199,34 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast( Type *TypeOfNewCast = PointerType::get(BC->getType()->getPointerElementType(), Src->getType()->getPointerAddressSpace()); + Value *NewBC; if (BitCastInst *BCI = dyn_cast(BC)) { Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI); - Value *NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI); + NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI); NewBC->takeName(BC); + // Without RAUWing BC, the compiler would visit BC again and emit + // redundant instructions. This is exercised in test @rauw in + // access-non-generic.ll. BC->replaceAllUsesWith(NewBC); } else { // BC is a constant expression. Constant *NewCast = ConstantExpr::getBitCast(cast(Src), TypeOfNewCast); - Constant *NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType()); - BC->replaceAllUsesWith(NewBC); + NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType()); } - return true; + return NewBC; } -bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V, - int Depth) { - // Returns true if V is already an eliminable addrspacecast. +Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V, + int Depth) { + // Returns V if V is already an eliminable addrspacecast. if (isEliminableAddrSpaceCast(V)) - return true; + return V; // Limit the depth to prevent this recursive function from running too long. const int MaxDepth = 20; if (Depth >= MaxDepth) - return false; + return nullptr; // If V is a GEP or bitcast, hoist the addrspacecast if any from its pointer // operand. This enables optimizeMemoryInstruction to shortcut addrspacecasts @@ -232,28 +237,29 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V, if (BitCastOperator *BC = dyn_cast(V)) return hoistAddrSpaceCastFromBitCast(BC, Depth); - return false; + return nullptr; } bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI, unsigned Idx) { - if (hoistAddrSpaceCastFrom(MI->getOperand(Idx))) { - // load/store (addrspacecast X) => load/store X if shortcutting the - // addrspacecast is valid and can improve performance. - // - // e.g., - // %1 = addrspacecast float addrspace(3)* %0 to float* - // %2 = load float* %1 - // -> - // %2 = load float addrspace(3)* %0 - // - // Note: the addrspacecast can also be a constant expression. - assert(isEliminableAddrSpaceCast(MI->getOperand(Idx))); - Operator *ASC = dyn_cast(MI->getOperand(Idx)); - MI->setOperand(Idx, ASC->getOperand(0)); - return true; - } - return false; + Value *NewOperand = hoistAddrSpaceCastFrom(MI->getOperand(Idx)); + if (NewOperand == nullptr) + return false; + + // load/store (addrspacecast X) => load/store X if shortcutting the + // addrspacecast is valid and can improve performance. + // + // e.g., + // %1 = addrspacecast float addrspace(3)* %0 to float* + // %2 = load float* %1 + // -> + // %2 = load float addrspace(3)* %0 + // + // Note: the addrspacecast can also be a constant expression. + assert(isEliminableAddrSpaceCast(NewOperand)); + Operator *ASC = dyn_cast(NewOperand); + MI->setOperand(Idx, ASC->getOperand(0)); + return true; } bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) { diff --git a/test/CodeGen/NVPTX/access-non-generic.ll b/test/CodeGen/NVPTX/access-non-generic.ll index 5deefe881e3..c1327274a9c 100644 --- a/test/CodeGen/NVPTX/access-non-generic.ll +++ b/test/CodeGen/NVPTX/access-non-generic.ll @@ -101,6 +101,28 @@ define i32 @ld_int_from_global_float(float addrspace(1)* %input, i32 %i, i32 %j) ret i32 %5 } +define void @nested_const_expr() { +; PTX-LABEL: nested_const_expr( + ; store 1 to bitcast(gep(addrspacecast(array), 0, 1)) + store i32 1, i32* bitcast (float* getelementptr ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i64 0, i64 1) to i32*), align 4 +; PTX: mov.u32 %r1, 1; +; PTX-NEXT: st.shared.u32 [array+4], %r1; + ret void +} + +define void @rauw(float addrspace(1)* %input) { + %generic_input = addrspacecast float addrspace(1)* %input to float* + %addr = getelementptr float, float* %generic_input, i64 10 + %v = load float, float* %addr + store float %v, float* %addr + ret void +; IR-LABEL: @rauw( +; IR-NEXT: %1 = getelementptr float, float addrspace(1)* %input, i64 10 +; IR-NEXT: %v = load float, float addrspace(1)* %1 +; IR-NEXT: store float %v, float addrspace(1)* %1 +; IR-NEXT: ret void +} + declare void @llvm.cuda.syncthreads() #3 attributes #3 = { noduplicate nounwind }