diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 008e1311b73..e7cc2067373 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -537,16 +537,43 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init, TD->getPrefTypeAlignment(CFP->getType())); } - if (Init->getType()->isAggregateType()) { + Type *InitTy = Init->getType(); + if (StructType *ST = dyn_cast(InitTy)) { + const StructLayout *SL = TD->getStructLayout(ST); + EVT PtrVT = InitPtr.getValueType(); - unsigned NumElements = Init->getType()->getArrayNumElements(); + SmallVector Chains; + + for (unsigned I = 0, N = ST->getNumElements(); I != N; ++I) { + SDValue Offset = DAG.getConstant(SL->getElementOffset(I), PtrVT); + SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset); + + Constant *Elt = Init->getAggregateElement(I); + Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG)); + } + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + } + + if (SequentialType *SeqTy = dyn_cast(InitTy)) { + EVT PtrVT = InitPtr.getValueType(); + + unsigned NumElements; + if (ArrayType *AT = dyn_cast(SeqTy)) + NumElements = AT->getNumElements(); + else if (VectorType *VT = dyn_cast(SeqTy)) + NumElements = VT->getNumElements(); + else + llvm_unreachable("Unexpected type"); + + unsigned EltSize = TD->getTypeAllocSize(SeqTy->getElementType()); SmallVector Chains; for (unsigned i = 0; i < NumElements; ++i) { - SDValue Offset = DAG.getConstant(i * TD->getTypeAllocSize( - Init->getType()->getArrayElementType()), PtrVT); + SDValue Offset = DAG.getConstant(i * EltSize, PtrVT); SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset); - Chains.push_back(LowerConstantInitializer(Init->getAggregateElement(i), - GV, Ptr, Chain, DAG)); + + Constant *Elt = Init->getAggregateElement(i); + Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG)); } return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); diff --git a/test/CodeGen/R600/gv-const-addrspace-fail.ll b/test/CodeGen/R600/gv-const-addrspace-fail.ll index f217ab5df7f..ebd78110762 100644 --- a/test/CodeGen/R600/gv-const-addrspace-fail.ll +++ b/test/CodeGen/R600/gv-const-addrspace-fail.ll @@ -28,3 +28,31 @@ define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 { store i16 %1, i16 addrspace(1)* %out ret void } + +%struct.bar = type { float, [5 x i8] } + +; The illegal i8s aren't handled +@struct_bar_gv = internal addrspace(2) unnamed_addr constant [1 x %struct.bar] [ %struct.bar { float 16.0, [5 x i8] [i8 0, i8 1, i8 2, i8 3, i8 4] } ] + +; FUNC-LABEL: @struct_bar_gv_load +define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [1 x %struct.bar] addrspace(2)* @struct_bar_gv, i32 0, i32 0, i32 1, i32 %index + %load = load i8 addrspace(2)* %gep, align 1 + store i8 %load, i8 addrspace(1)* %out, align 1 + ret void +} + + +; The private load isn't scalarzied. +@array_vector_gv = internal addrspace(2) constant [4 x <4 x i32>] [ <4 x i32> , + <4 x i32> , + <4 x i32> , + <4 x i32> ] + +; FUNC-LABEL: @array_vector_gv_load +define void @array_vector_gv_load(<4 x i32> addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [4 x <4 x i32>] addrspace(2)* @array_vector_gv, i32 0, i32 %index + %load = load <4 x i32> addrspace(2)* %gep, align 16 + store <4 x i32> %load, <4 x i32> addrspace(1)* %out, align 16 + ret void +} diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/R600/gv-const-addrspace.ll index a3504df77b3..01760617d3e 100644 --- a/test/CodeGen/R600/gv-const-addrspace.ll +++ b/test/CodeGen/R600/gv-const-addrspace.ll @@ -43,3 +43,30 @@ entry: store i32 %1, i32 addrspace(1)* %out ret void } + + +%struct.foo = type { float, [5 x i32] } + +@struct_foo_gv = internal addrspace(2) unnamed_addr constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ] + +; FUNC-LABEL: @struct_foo_gv_load + +define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index + %load = load i32 addrspace(2)* %gep, align 4 + store i32 %load, i32 addrspace(1)* %out, align 4 + ret void +} + +@array_v1_gv = internal addrspace(2) constant [4 x <1 x i32>] [ <1 x i32> , + <1 x i32> , + <1 x i32> , + <1 x i32> ] + +; FUNC-LABEL: @array_v1_gv_load +define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index + %load = load <1 x i32> addrspace(2)* %gep, align 4 + store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4 + ret void +}