diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index a6cc44d4387..dd355012cb9 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -156,6 +156,7 @@ void initializeIfConverterPass(PassRegistry&); void initializeImplicitNullChecksPass(PassRegistry&); void initializeIndVarSimplifyLegacyPassPass(PassRegistry&); void initializeInductiveRangeCheckEliminationPass(PassRegistry&); +void initializeInferAddressSpacesPass(PassRegistry&); void initializeInferFunctionAttrsLegacyPassPass(PassRegistry&); void initializeInlineCostAnalysisPass(PassRegistry&); void initializeInstCountPass(PassRegistry&); diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index d4215412b14..b62b05574b5 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -410,6 +410,15 @@ Pass *createLowerGuardIntrinsicPass(); // Pass *createCorrelatedValuePropagationPass(); +//===----------------------------------------------------------------------===// +// +// InferAddressSpaces - Modify users of addrspacecast instructions with values +// in the source address space if using the destination address space is slower +// on the target. +// +FunctionPass *createInferAddressSpacesPass(); +extern char &InferAddressSpacesID; + //===----------------------------------------------------------------------===// // // InstructionSimplifier - Remove redundant instructions. diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index 399ff1fd96e..a8eecfcc138 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -17,7 +17,6 @@ set(NVPTXCodeGen_sources NVPTXISelDAGToDAG.cpp NVPTXISelLowering.cpp NVPTXImageOptimizer.cpp - NVPTXInferAddressSpaces.cpp NVPTXInstrInfo.cpp NVPTXLowerAggrCopies.cpp NVPTXLowerArgs.cpp diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index 24b6c0879f1..902d1b25e7d 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -45,7 +45,6 @@ FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); ModulePass *createNVPTXAssignValidGlobalNamesPass(); ModulePass *createGenericToNVVMPass(); -FunctionPass *createNVPTXInferAddressSpacesPass(); FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion); FunctionPass *createNVVMReflectPass(); MachineFunctionPass *createNVPTXPrologEpilogPass(); diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index a8b88a74833..ab5298d0dcf 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -51,7 +51,6 @@ void initializeNVVMReflectPass(PassRegistry&); void initializeGenericToNVVMPass(PassRegistry&); void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); -void initializeNVPTXInferAddressSpacesPass(PassRegistry &); void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); void initializeNVPTXLowerArgsPass(PassRegistry &); void initializeNVPTXLowerAllocaPass(PassRegistry &); @@ -71,7 +70,6 @@ extern "C" void LLVMInitializeNVPTXTarget() { initializeGenericToNVVMPass(PR); initializeNVPTXAllocaHoistingPass(PR); initializeNVPTXAssignValidGlobalNamesPass(PR); - initializeNVPTXInferAddressSpacesPass(PR); initializeNVPTXLowerArgsPass(PR); initializeNVPTXLowerAllocaPass(PR); initializeNVPTXLowerAggrCopiesPass(PR); @@ -195,7 +193,7 @@ void NVPTXPassConfig::addAddressSpaceInferencePasses() { // be eliminated by SROA. addPass(createSROAPass()); addPass(createNVPTXLowerAllocaPass()); - addPass(createNVPTXInferAddressSpacesPass()); + addPass(createInferAddressSpacesPass()); } void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() { diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index b2579a813e6..b323ab3bd44 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_library(LLVMScalarOpts IVUsersPrinter.cpp InductiveRangeCheckElimination.cpp IndVarSimplify.cpp + InferAddressSpaces.cpp JumpThreading.cpp LICM.cpp LoopAccessAnalysisPrinter.cpp diff --git a/lib/Target/NVPTX/NVPTXInferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp similarity index 87% rename from lib/Target/NVPTX/NVPTXInferAddressSpaces.cpp rename to lib/Transforms/Scalar/InferAddressSpaces.cpp index 64a59ba2713..8ca35c0061f 100644 --- a/lib/Target/NVPTX/NVPTXInferAddressSpaces.cpp +++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -89,7 +89,7 @@ // //===----------------------------------------------------------------------===// -#include "NVPTX.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" @@ -103,7 +103,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" -#define DEBUG_TYPE "nvptx-infer-addrspace" +#define DEBUG_TYPE "infer-address-spaces" using namespace llvm; @@ -112,8 +112,8 @@ static const unsigned UnknownAddressSpace = ~0u; using ValueToAddrSpaceMapTy = DenseMap; -/// \brief NVPTXInferAddressSpaces -class NVPTXInferAddressSpaces: public FunctionPass { +/// \brief InferAddressSpaces +class InferAddressSpaces: public FunctionPass { /// Target specific address space which uses of should be replaced if /// possible. unsigned FlatAddrSpace; @@ -121,7 +121,7 @@ class NVPTXInferAddressSpaces: public FunctionPass { public: static char ID; - NVPTXInferAddressSpaces() : FunctionPass(ID) {} + InferAddressSpaces() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -162,13 +162,13 @@ private: }; } // end anonymous namespace -char NVPTXInferAddressSpaces::ID = 0; +char InferAddressSpaces::ID = 0; namespace llvm { -void initializeNVPTXInferAddressSpacesPass(PassRegistry &); +void initializeInferAddressSpacesPass(PassRegistry &); } -INITIALIZE_PASS(NVPTXInferAddressSpaces, "nvptx-infer-addrspace", - "Infer address spaces", + +INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces", false, false) // Returns true if V is an address expression. @@ -212,9 +212,9 @@ static SmallVector getPointerOperands(const Value &V) { // If V is an unvisited flat address expression, appends V to PostorderStack // and marks it as visited. -void NVPTXInferAddressSpaces::appendsFlatAddressExpressionToPostorderStack( - Value *V, std::vector> *PostorderStack, - DenseSet *Visited) const { +void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack( + Value *V, std::vector> *PostorderStack, + DenseSet *Visited) const { assert(V->getType()->isPointerTy()); if (isAddressExpression(*V) && V->getType()->getPointerAddressSpace() == FlatAddrSpace) { @@ -226,7 +226,7 @@ void NVPTXInferAddressSpaces::appendsFlatAddressExpressionToPostorderStack( // Returns all flat address expressions in function F. The elements are ordered // in postorder. std::vector -NVPTXInferAddressSpaces::collectFlatAddressExpressions(Function &F) const { +InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { // This function implements a non-recursive postorder traversal of a partial // use-def graph of function F. std::vector> PostorderStack; @@ -237,10 +237,10 @@ NVPTXInferAddressSpaces::collectFlatAddressExpressions(Function &F) const { for (Instruction &I : instructions(F)) { if (isa(I)) { appendsFlatAddressExpressionToPostorderStack( - I.getOperand(0), &PostorderStack, &Visited); + I.getOperand(0), &PostorderStack, &Visited); } else if (isa(I)) { appendsFlatAddressExpressionToPostorderStack( - I.getOperand(1), &PostorderStack, &Visited); + I.getOperand(1), &PostorderStack, &Visited); } } @@ -257,7 +257,7 @@ NVPTXInferAddressSpaces::collectFlatAddressExpressions(Function &F) const { PostorderStack.back().second = true; for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) { appendsFlatAddressExpressionToPostorderStack( - PtrOperand, &PostorderStack, &Visited); + PtrOperand, &PostorderStack, &Visited); } } return Postorder; @@ -267,16 +267,16 @@ NVPTXInferAddressSpaces::collectFlatAddressExpressions(Function &F) const { // of OperandUse.get() in the new address space. If the clone is not ready yet, // returns an undef in the new address space as a placeholder. static Value *operandWithNewAddressSpaceOrCreateUndef( - const Use &OperandUse, unsigned NewAddrSpace, - const ValueToValueMapTy &ValueWithNewAddrSpace, - SmallVectorImpl *UndefUsesToFix) { + const Use &OperandUse, unsigned NewAddrSpace, + const ValueToValueMapTy &ValueWithNewAddrSpace, + SmallVectorImpl *UndefUsesToFix) { Value *Operand = OperandUse.get(); if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) return NewOperand; UndefUsesToFix->push_back(&OperandUse); return UndefValue::get( - Operand->getType()->getPointerElementType()->getPointerTo(NewAddrSpace)); + Operand->getType()->getPointerElementType()->getPointerTo(NewAddrSpace)); } // Returns a clone of `I` with its operands converted to those specified in @@ -289,11 +289,11 @@ static Value *operandWithNewAddressSpaceOrCreateUndef( // from a pointer whose type already matches. Therefore, this function returns a // Value* instead of an Instruction*. static Value *cloneInstructionWithNewAddressSpace( - Instruction *I, unsigned NewAddrSpace, - const ValueToValueMapTy &ValueWithNewAddrSpace, - SmallVectorImpl *UndefUsesToFix) { + Instruction *I, unsigned NewAddrSpace, + const ValueToValueMapTy &ValueWithNewAddrSpace, + SmallVectorImpl *UndefUsesToFix) { Type *NewPtrType = - I->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); + I->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); if (I->getOpcode() == Instruction::AddrSpaceCast) { Value *Src = I->getOperand(0); @@ -313,7 +313,7 @@ static Value *cloneInstructionWithNewAddressSpace( NewPointerOperands.push_back(nullptr); else NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef( - OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix)); + OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix)); } switch (I->getOpcode()) { @@ -333,8 +333,8 @@ static Value *cloneInstructionWithNewAddressSpace( case Instruction::GetElementPtr: { GetElementPtrInst *GEP = cast(I); GetElementPtrInst *NewGEP = GetElementPtrInst::Create( - GEP->getSourceElementType(), NewPointerOperands[0], - SmallVector(GEP->idx_begin(), GEP->idx_end())); + GEP->getSourceElementType(), NewPointerOperands[0], + SmallVector(GEP->idx_begin(), GEP->idx_end())); NewGEP->setIsInBounds(GEP->isInBounds()); return NewGEP; } @@ -347,10 +347,10 @@ static Value *cloneInstructionWithNewAddressSpace( // constant expression `CE` with its operands replaced as specified in // ValueWithNewAddrSpace. static Value *cloneConstantExprWithNewAddressSpace( - ConstantExpr *CE, unsigned NewAddrSpace, - const ValueToValueMapTy &ValueWithNewAddrSpace) { + ConstantExpr *CE, unsigned NewAddrSpace, + const ValueToValueMapTy &ValueWithNewAddrSpace) { Type *TargetType = - CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); + CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); if (CE->getOpcode() == Instruction::AddrSpaceCast) { // Because CE is flat, the source address space must be specific. @@ -382,8 +382,8 @@ static Value *cloneConstantExprWithNewAddressSpace( // Needs to specify the source type while constructing a getelementptr // constant expression. return CE->getWithOperands( - NewOperands, TargetType, /*OnlyIfReduced=*/false, - NewOperands[0]->getType()->getPointerElementType()); + NewOperands, TargetType, /*OnlyIfReduced=*/false, + NewOperands[0]->getType()->getPointerElementType()); } return CE->getWithOperands(NewOperands, TargetType); @@ -394,7 +394,7 @@ static Value *cloneConstantExprWithNewAddressSpace( // expression whose address space needs to be modified, in postorder. // // See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix. -Value *NVPTXInferAddressSpaces::cloneValueWithNewAddressSpace( +Value *InferAddressSpaces::cloneValueWithNewAddressSpace( Value *V, unsigned NewAddrSpace, const ValueToValueMapTy &ValueWithNewAddrSpace, SmallVectorImpl *UndefUsesToFix) const { @@ -404,7 +404,7 @@ Value *NVPTXInferAddressSpaces::cloneValueWithNewAddressSpace( if (Instruction *I = dyn_cast(V)) { Value *NewV = cloneInstructionWithNewAddressSpace( - I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix); + I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix); if (Instruction *NewI = dyn_cast(NewV)) { if (NewI->getParent() == nullptr) { NewI->insertBefore(I); @@ -415,13 +415,13 @@ Value *NVPTXInferAddressSpaces::cloneValueWithNewAddressSpace( } return cloneConstantExprWithNewAddressSpace( - cast(V), NewAddrSpace, ValueWithNewAddrSpace); + cast(V), NewAddrSpace, ValueWithNewAddrSpace); } // Defines the join operation on the address space lattice (see the file header // comments). -unsigned NVPTXInferAddressSpaces::joinAddressSpaces(unsigned AS1, - unsigned AS2) const { +unsigned InferAddressSpaces::joinAddressSpaces(unsigned AS1, + unsigned AS2) const { if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace) return FlatAddrSpace; @@ -434,7 +434,7 @@ unsigned NVPTXInferAddressSpaces::joinAddressSpaces(unsigned AS1, return (AS1 == AS2) ? AS1 : FlatAddrSpace; } -bool NVPTXInferAddressSpaces::runOnFunction(Function &F) { +bool InferAddressSpaces::runOnFunction(Function &F) { if (skipFunction(F)) return false; @@ -456,9 +456,9 @@ bool NVPTXInferAddressSpaces::runOnFunction(Function &F) { return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F); } -void NVPTXInferAddressSpaces::inferAddressSpaces( - const std::vector &Postorder, - ValueToAddrSpaceMapTy *InferredAddrSpace) const { +void InferAddressSpaces::inferAddressSpaces( + const std::vector &Postorder, + ValueToAddrSpaceMapTy *InferredAddrSpace) const { SetVector Worklist(Postorder.begin(), Postorder.end()); // Initially, all expressions are in the uninitialized address space. for (Value *V : Postorder) @@ -490,8 +490,8 @@ void NVPTXInferAddressSpaces::inferAddressSpaces( continue; // Function updateAddressSpace moves the address space down a lattice - // path. Therefore, nothing to do if User is already inferred as flat - // (the bottom element in the lattice). + // path. Therefore, nothing to do if User is already inferred as flat (the + // bottom element in the lattice). if (Pos->second == FlatAddrSpace) continue; @@ -500,8 +500,8 @@ void NVPTXInferAddressSpaces::inferAddressSpaces( } } -Optional NVPTXInferAddressSpaces::updateAddressSpace( - const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const { +Optional InferAddressSpaces::updateAddressSpace( + const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const { assert(InferredAddrSpace.count(&V)); // The new inferred address space equals the join of the address spaces @@ -514,7 +514,8 @@ Optional NVPTXInferAddressSpaces::updateAddressSpace( else OperandAS = PtrOperand->getType()->getPointerAddressSpace(); NewAS = joinAddressSpaces(NewAS, OperandAS); - // join(flat, *) = flat. So we can break if NewAS is already generic. + + // join(flat, *) = flat. So we can break if NewAS is already flat. if (NewAS == FlatAddrSpace) break; } @@ -526,9 +527,9 @@ Optional NVPTXInferAddressSpaces::updateAddressSpace( return NewAS; } -bool NVPTXInferAddressSpaces::rewriteWithNewAddressSpaces( - const std::vector &Postorder, - const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { +bool InferAddressSpaces::rewriteWithNewAddressSpaces( + const std::vector &Postorder, + const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { // For each address expression to be modified, creates a clone of it with its // pointer operands converted to the new address space. Since the pointer // operands are converted, the clone is naturally in the new address space by @@ -539,7 +540,7 @@ bool NVPTXInferAddressSpaces::rewriteWithNewAddressSpaces( unsigned NewAddrSpace = InferredAddrSpace.lookup(V); if (V->getType()->getPointerAddressSpace() != NewAddrSpace) { ValueWithNewAddrSpace[V] = cloneValueWithNewAddressSpace( - V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix); + V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix); } } @@ -577,15 +578,15 @@ bool NVPTXInferAddressSpaces::rewriteWithNewAddressSpaces( // so the resultant load/store is still valid. U->set(NewV); } else if (isa(U->getUser())) { - // Otherwise, replaces the use with generic(NewV). + // Otherwise, replaces the use with flat(NewV). // TODO: Some optimization opportunities are missed. For example, in // %0 = icmp eq float* %p, %q // if both p and q are inferred to be shared, we can rewrite %0 as // %0 = icmp eq float addrspace(3)* %new_p, %new_q // instead of currently - // %generic_p = addrspacecast float addrspace(3)* %new_p to float* - // %generic_q = addrspacecast float addrspace(3)* %new_q to float* - // %0 = icmp eq float* %generic_p, %generic_q + // %flat_p = addrspacecast float addrspace(3)* %new_p to float* + // %flat_q = addrspacecast float addrspace(3)* %new_q to float* + // %0 = icmp eq float* %flat_p, %flat_q if (Instruction *I = dyn_cast(V)) { BasicBlock::iterator InsertPos = std::next(I->getIterator()); while (isa(InsertPos)) @@ -604,6 +605,6 @@ bool NVPTXInferAddressSpaces::rewriteWithNewAddressSpaces( return true; } -FunctionPass *llvm::createNVPTXInferAddressSpacesPass() { - return new NVPTXInferAddressSpaces(); +FunctionPass *llvm::createInferAddressSpacesPass() { + return new InferAddressSpaces(); } diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index f2727b08881..002e125576b 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -50,6 +50,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeFlattenCFGPassPass(Registry); initializeInductiveRangeCheckEliminationPass(Registry); initializeIndVarSimplifyLegacyPassPass(Registry); + initializeInferAddressSpacesPass(Registry); initializeJumpThreadingPass(Registry); initializeLegacyLICMPassPass(Registry); initializeLegacyLoopSinkPassPass(Registry); diff --git a/test/CodeGen/NVPTX/access-non-generic.ll b/test/CodeGen/NVPTX/access-non-generic.ll index f0850c301f1..d5776d77b10 100644 --- a/test/CodeGen/NVPTX/access-non-generic.ll +++ b/test/CodeGen/NVPTX/access-non-generic.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix PTX ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix PTX -; RUN: opt -mtriple=nvptx-- < %s -S -nvptx-infer-addrspace | FileCheck %s --check-prefix IR -; RUN: opt -mtriple=nvptx64-- < %s -S -nvptx-infer-addrspace | FileCheck %s --check-prefix IR +; RUN: opt -mtriple=nvptx-- < %s -S -infer-address-spaces | FileCheck %s --check-prefix IR +; RUN: opt -mtriple=nvptx64-- < %s -S -infer-address-spaces | FileCheck %s --check-prefix IR @array = internal addrspace(3) global [10 x float] zeroinitializer, align 4 @scalar = internal addrspace(3) global float 0.000000e+00, align 4 diff --git a/test/CodeGen/NVPTX/lower-alloca.ll b/test/CodeGen/NVPTX/lower-alloca.ll index 4177cd1fe97..3db225ef0e7 100644 --- a/test/CodeGen/NVPTX/lower-alloca.ll +++ b/test/CodeGen/NVPTX/lower-alloca.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -nvptx-lower-alloca -nvptx-infer-addrspace | FileCheck %s +; RUN: opt < %s -S -nvptx-lower-alloca -infer-address-spaces | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"