mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-16 08:29:43 +00:00
AMDGPU: Use inbounds when calculating workitem offset
When promoting allocas to LDS, we know we are indexing into a specific area just created, and the calculation will also never overflow. Also emit some of the muls as nsw nuw, because instcombine infers this already from the range metadata. I think putting this on the other adds and muls might be OK too, but I'm not 100% sure. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259545 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
380d47d651
commit
374613d697
@ -485,17 +485,18 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
|
|||||||
Value *TIdY = getWorkitemID(Builder, 1);
|
Value *TIdY = getWorkitemID(Builder, 1);
|
||||||
Value *TIdZ = getWorkitemID(Builder, 2);
|
Value *TIdZ = getWorkitemID(Builder, 2);
|
||||||
|
|
||||||
Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
|
Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);
|
||||||
Tmp0 = Builder.CreateMul(Tmp0, TIdX);
|
Tmp0 = Builder.CreateMul(Tmp0, TIdX);
|
||||||
Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ);
|
Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);
|
||||||
Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
|
Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
|
||||||
TID = Builder.CreateAdd(TID, TIdZ);
|
TID = Builder.CreateAdd(TID, TIdZ);
|
||||||
|
|
||||||
std::vector<Value*> Indices;
|
Value *Indices[] = {
|
||||||
Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
|
Constant::getNullValue(Type::getInt32Ty(Mod->getContext())),
|
||||||
Indices.push_back(TID);
|
TID
|
||||||
|
};
|
||||||
|
|
||||||
Value *Offset = Builder.CreateGEP(GVTy, GV, Indices);
|
Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices);
|
||||||
I.mutateType(Offset->getType());
|
I.mutateType(Offset->getType());
|
||||||
I.replaceAllUsesWith(Offset);
|
I.replaceAllUsesWith(Offset);
|
||||||
I.eraseFromParent();
|
I.eraseFromParent();
|
||||||
|
@ -48,9 +48,22 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
|||||||
; HSAOPT: [[LDZU:%[0-9]+]] = load i32, i32 addrspace(2)* [[GEP1]], align 4, !range !1, !invariant.load !0
|
; HSAOPT: [[LDZU:%[0-9]+]] = load i32, i32 addrspace(2)* [[GEP1]], align 4, !range !1, !invariant.load !0
|
||||||
; HSAOPT: [[EXTRACTY:%[0-9]+]] = lshr i32 [[LDXY]], 16
|
; HSAOPT: [[EXTRACTY:%[0-9]+]] = lshr i32 [[LDXY]], 16
|
||||||
|
|
||||||
; HSAOPT: call i32 @llvm.amdgcn.workitem.id.x(), !range !1
|
; HSAOPT: [[WORKITEM_ID_X:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.x(), !range !1
|
||||||
; HSAOPT: call i32 @llvm.amdgcn.workitem.id.y(), !range !1
|
; HSAOPT: [[WORKITEM_ID_Y:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.y(), !range !1
|
||||||
; HSAOPT: call i32 @llvm.amdgcn.workitem.id.z(), !range !1
|
; HSAOPT: [[WORKITEM_ID_Z:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.z(), !range !1
|
||||||
|
|
||||||
|
; HSAOPT: [[Y_SIZE_X_Z_SIZE:%[0-9]+]] = mul nuw nsw i32 [[EXTRACTY]], [[LDZU]]
|
||||||
|
; HSAOPT: [[YZ_X_XID:%[0-9]+]] = mul i32 [[Y_SIZE_X_Z_SIZE]], [[WORKITEM_ID_X]]
|
||||||
|
; HSAOPT: [[Y_X_Z_SIZE:%[0-9]+]] = mul nuw nsw i32 [[WORKITEM_ID_Y]], [[LDZU]]
|
||||||
|
; HSAOPT: [[ADD_YZ_X_X_YZ_SIZE:%[0-9]+]] = add i32 [[YZ_X_XID]], [[Y_X_Z_SIZE]]
|
||||||
|
; HSAOPT: [[ADD_ZID:%[0-9]+]] = add i32 [[ADD_YZ_X_X_YZ_SIZE]], [[WORKITEM_ID_Z]]
|
||||||
|
|
||||||
|
; HSAOPT: [[LOCAL_GEP:%[0-9]+]] = getelementptr inbounds [256 x [5 x i32]], [256 x [5 x i32]] addrspace(3)* @stack, i32 0, i32 [[ADD_ZID]]
|
||||||
|
; HSAOPT: %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(3)* [[LOCAL_GEP]], i32 0, i32 {{%[0-9]+}}
|
||||||
|
; HSAOPT: %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(3)* [[LOCAL_GEP]], i32 0, i32 {{%[0-9]+}}
|
||||||
|
; HSAOPT: %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(3)* [[LOCAL_GEP]], i32 0, i32 0
|
||||||
|
; HSAOPT: %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(3)* [[LOCAL_GEP]], i32 0, i32 1
|
||||||
|
|
||||||
|
|
||||||
; NOHSAOPT: call i32 @llvm.r600.read.local.size.y(), !range !0
|
; NOHSAOPT: call i32 @llvm.r600.read.local.size.y(), !range !0
|
||||||
; NOHSAOPT: call i32 @llvm.r600.read.local.size.z(), !range !0
|
; NOHSAOPT: call i32 @llvm.r600.read.local.size.z(), !range !0
|
||||||
|
Loading…
Reference in New Issue
Block a user