[CodeGenPrepare] Don't sink non-cheap addrspacecasts.

Summary:
Previously, CGP would unconditionally sink addrspacecast instructions,
even going so far as to sink them into a loop.

Now we check that the cast is "cheap", as defined by TLI.

We introduce a new "is-cheap" function to TLI rather than using
isNopAddrSpaceCast because some GPU platforms want the ability to ask
for non-nop casts to be sunk.

Reviewers: arsenm, tra

Subscribers: jholewinski, wdng, llvm-commits

Differential Revision: https://reviews.llvm.org/D26923

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287591 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Justin Lebar 2016-11-21 22:49:15 +00:00
parent 5db0e4c349
commit 09220c80d3
3 changed files with 35 additions and 0 deletions

View File

@ -1153,6 +1153,12 @@ public:
return false;
}
/// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
/// are happy to sink it into basic blocks.
virtual bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
return isNoopAddrSpaceCast(SrcAS, DestAS);
}
/// Return true if the pointer arguments to CI should be aligned by aligning
/// the object whose address is being passed. If so then MinSize is set to the
/// minimum size the object must be to be aligned and PrefAlign is set to the

View File

@ -927,6 +927,14 @@ static bool SinkCast(CastInst *CI) {
///
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
const DataLayout &DL) {
// Sink only "cheap" (or nop) address-space casts. This is a weaker condition
// than sinking only nop casts, but is helpful on some platforms.
if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),
ASC->getDestAddressSpace()))
return false;
}
// If this is a noop copy,
EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(DL, CI->getType());

View File

@ -0,0 +1,21 @@
; RUN: opt -S -codegenprepare < %s | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
; CHECK-LABEL: @test
define i64 @test(i1 %pred, i64* %ptr) {
; CHECK: addrspacecast
%ptr_as1 = addrspacecast i64* %ptr to i64 addrspace(1)*
br i1 %pred, label %l1, label %l2
l1:
; CHECK-LABEL: l1:
; CHECK-NOT: addrspacecast
%v1 = load i64, i64* %ptr
ret i64 %v1
l2:
; CHECK-LABEL: l2:
; CHECK-NOT: addrspacecast
%v2 = load i64, i64 addrspace(1)* %ptr_as1
ret i64 %v2
}