From 87d1190761899f3e5e4aa93b76c72be69108f3b5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 22 Apr 2016 21:01:41 +0000 Subject: [PATCH] DAGCombiner: Relax alignment restriction when changing store type If the target allows the alignment, this should be OK. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@267217 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 9 ++++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 24 +++++---- .../AMDGPU/reduce-store-width-alignment.ll | 53 +++++++++++++++++++ test/CodeGen/X86/avx-vextractf128.ll | 2 +- 4 files changed, 77 insertions(+), 11 deletions(-) create mode 100644 test/CodeGen/AMDGPU/reduce-store-width-alignment.ll diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 9b5ff57fea8..678817b39d1 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -286,6 +286,15 @@ public: return true; } + /// isStoreBitCastBeneficial() - Mirror of isLoadBitCastBeneficial(). Return + /// true if the following transform is beneficial. + /// + /// (store (y (conv x)), y*)) -> (store x, (x*)) + virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT) const { + // Default to the same logic as stores. + return isLoadBitCastBeneficial(StoreVT, BitcastVT); + } + /// Return true if it is expected to be cheaper to do a store of a non-zero /// vector constant with the given size and type for the address space than to /// store the individual scalar element constants. diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 690a53e785c..75faf526fab 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11970,17 +11970,21 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // resultant store does not need a higher alignment than the original. if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && ST->isUnindexed()) { - unsigned OrigAlign = ST->getAlignment(); EVT SVT = Value.getOperand(0).getValueType(); - unsigned Align = DAG.getDataLayout().getABITypeAlignment( - SVT.getTypeForEVT(*DAG.getContext())); - if (Align <= OrigAlign && - ((!LegalOperations && !ST->isVolatile()) || - TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) - return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), - Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), OrigAlign, - ST->getAAInfo()); + if (((!LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) && + TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) { + unsigned OrigAlign = ST->getAlignment(); + bool Fast = false; + if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT, + ST->getAddressSpace(), OrigAlign, &Fast) && + Fast) { + return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), + Ptr, ST->getPointerInfo(), ST->isVolatile(), + ST->isNonTemporal(), OrigAlign, + ST->getAAInfo()); + } + } } // Turn 'store undef, Ptr' -> nothing. diff --git a/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll b/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll new file mode 100644 index 00000000000..281e49f804c --- /dev/null +++ b/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll @@ -0,0 +1,53 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}store_v2i32_as_v4i16_align_4: +; GCN: s_load_dwordx2 +; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}} +define void @store_v2i32_as_v4i16_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 { + %x.bc = bitcast <2 x i32> %x to <4 x i16> + store <4 x i16> %x.bc, <4 x i16> addrspace(3)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}store_v4i32_as_v8i16_align_4: +; GCN: s_load_dwordx4 +; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3 +; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}} +define void @store_v4i32_as_v8i16_align_4(<8 x i16> addrspace(3)* align 4 %out, <4 x i32> %x) #0 { + %x.bc = bitcast <4 x i32> %x to <8 x i16> + store <8 x i16> %x.bc, <8 x i16> addrspace(3)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}store_v2i32_as_i64_align_4: +; GCN: s_load_dwordx2 +; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}} +define void @store_v2i32_as_i64_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 { + %x.bc = bitcast <2 x i32> %x to <4 x i16> + store <4 x i16> %x.bc, <4 x i16> addrspace(3)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}store_v4i32_as_v2i64_align_4: +; GCN: s_load_dwordx4 +; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3 +; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}} +define void @store_v4i32_as_v2i64_align_4(<2 x i64> addrspace(3)* align 4 %out, <4 x i32> %x) #0 { + %x.bc = bitcast <4 x i32> %x to <2 x i64> + store <2 x i64> %x.bc, <2 x i64> addrspace(3)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}store_v4i16_as_v2i32_align_4: +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}} +define void @store_v4i16_as_v2i32_align_4(<2 x i32> addrspace(3)* align 4 %out, <4 x i16> %x) #0 { + %x.bc = bitcast <4 x i16> %x to <2 x i32> + store <2 x i32> %x.bc, <2 x i32> addrspace(3)* %out, align 4 + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/X86/avx-vextractf128.ll b/test/CodeGen/X86/avx-vextractf128.ll index d7a6d61ba0a..2feddddaf78 100644 --- a/test/CodeGen/X86/avx-vextractf128.ll +++ b/test/CodeGen/X86/avx-vextractf128.ll @@ -119,7 +119,7 @@ entry: define void @t9(i64* %p) { ; CHECK-LABEL: t9: ; CHECK: ## BB#0: -; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 ; CHECK-NEXT: vmovups %ymm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq