diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 66a4b427a2d..bdc0f62f967 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12263,6 +12263,8 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { + assert(!OriginalLoad->isVolatile()); + EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); unsigned Align = OriginalLoad->getAlignment(); @@ -12457,9 +12459,12 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { ISD::isNormalLoad(InVec.getNode()) && !N->getOperand(1)->hasPredecessor(InVec.getNode())) { SDValue Index = N->getOperand(1); - if (LoadSDNode *OrigLoad = dyn_cast(InVec)) - return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, - OrigLoad); + if (LoadSDNode *OrigLoad = dyn_cast(InVec)) { + if (!OrigLoad->isVolatile()) { + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, + OrigLoad); + } + } } // Perform only after legalization to ensure build_vector / vector_shuffle diff --git a/test/CodeGen/AMDGPU/extractelt-to-trunc.ll b/test/CodeGen/AMDGPU/extractelt-to-trunc.ll index 3c5b59c321a..e160c20a03a 100644 --- a/test/CodeGen/AMDGPU/extractelt-to-trunc.ll +++ b/test/CodeGen/AMDGPU/extractelt-to-trunc.ll @@ -41,3 +41,37 @@ define void @bitcast_int_to_fpvector_extract_0(float addrspace(1)* %out, i64 add store float %extract, float addrspace(1)* %out ret void } + +; GCN-LABEL: {{^}}no_extract_volatile_load_extract0: +; GCN: buffer_load_dwordx4 +; GCN: buffer_store_dword v +define void @no_extract_volatile_load_extract0(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { +entry: + %vec = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in + %elt0 = extractelement <4 x i32> %vec, i32 0 + store i32 %elt0, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}no_extract_volatile_load_extract2: +; GCN: buffer_load_dwordx4 +; GCN: buffer_store_dword v + +define void @no_extract_volatile_load_extract2(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { +entry: + %vec = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in + %elt2 = extractelement <4 x i32> %vec, i32 2 + store i32 %elt2, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}no_extract_volatile_load_dynextract: +; GCN: buffer_load_dwordx4 +; GCN: buffer_store_dword v +define void @no_extract_volatile_load_dynextract(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx) { +entry: + %vec = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in + %eltN = extractelement <4 x i32> %vec, i32 %idx + store i32 %eltN, i32 addrspace(1)* %out + ret void +}