[DAGCombiner][AMDGPU][Mips] Fold bitcast with volatile loads if the resulting load is legal for the target.

Summary:
I'm not sure if this patch is correct or if it needs more qualifying somehow. Bitcast shouldn't change the size of the load so it should be ok? We already do something similar for stores. We'll change the type of a volatile store if the resulting store is Legal or Custom. I'm not sure we should be allowing Custom there...

I was playing around with converting X86 atomic loads/stores(except seq_cst) into regular volatile loads and stores during lowering. This would allow some special RMW isel patterns in X86InstrCompiler.td to be removed. But there's some floating point patterns in there that didn't work because we don't fold (f64 (bitconvert (i64 volatile load))) or (f32 (bitconvert (i32 volatile load))).

Reviewers: efriedma, atanasyan, arsenm

Reviewed By: efriedma

Subscribers: jvesely, arsenm, sdardis, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, arichardson, jrtc27, atanasyan, jfb, llvm-commits

Differential Revision: https://reviews.llvm.org/D50491

llvm-svn: 340797
This commit is contained in:
Craig Topper 2018-08-28 03:47:20 +00:00
parent a72b09e6fc
commit 707737eef4
5 changed files with 31 additions and 38 deletions

View File

@ -9833,12 +9833,16 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// fold (conv (load x)) -> (load (conv*)x)
// If the resultant load doesn't need a higher alignment than the original!
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile() &&
// Do not remove the cast if the types differ in endian layout.
TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
// If the load is volatile, we only want to change the load type if the
// resulting load is legal. Otherwise we might increase the number of
// memory accesses. We don't care if the original type was legal or not
// as we assume software couldn't rely on the number of accesses of an
// illegal type.
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isOperationLegal(ISD::LOAD, VT)) &&
TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
unsigned OrigAlign = LN0->getAlignment();
@ -14694,6 +14698,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
ST->isUnindexed()) {
EVT SVT = Value.getOperand(0).getValueType();
// If the store is volatile, we only want to change the store type if the
// resulting store is legal. Otherwise we might increase the number of
// memory accesses. We don't care if the original type was legal or not
// as we assume software couldn't rely on the number of accesses of an
// illegal type.
if (((!LegalOperations && !ST->isVolatile()) ||
TLI.isOperationLegal(ISD::STORE, SVT)) &&
TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {

View File

@ -147,10 +147,7 @@ define amdgpu_kernel void @test_copy_v3i8_align1(<3 x i8> addrspace(1)* %out, <3
}
; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_load:
; GCN: {{buffer|flat}}_load_ubyte
; GCN: {{buffer|flat}}_load_ubyte
; GCN: {{buffer|flat}}_load_ubyte
; GCN: {{buffer|flat}}_load_ubyte
; GCN: {{buffer|flat}}_load_dword
; GCN: buffer_store_dword
; GCN: s_endpgm
define amdgpu_kernel void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {

View File

@ -18,14 +18,10 @@ entry:
}
; ALL-LABEL: retldouble:
; N32-DAG: ld [[R2:\$[0-9]+]], %lo(fp128)([[R1:\$[0-9]+]])
; N32-DAG: ldc1 $f0, %lo(fp128)([[R1:\$[0-9]+]])
; N32-DAG: addiu [[R3:\$[0-9]+]], [[R1]], %lo(fp128)
; N32-DAG: ld [[R4:\$[0-9]+]], 8([[R3]])
; N32-DAG: dmtc1 [[R2]], $f0
; N32-DAG: dmtc1 [[R4]], $f2
; N32-DAG: ldc1 $f2, 8([[R3]])
; N64-DAG: lui [[R2:\$[0-9]+]], %highest(fp128)
; N64-DAG: ld [[R3:\$[0-9]+]], %lo(fp128)([[R2]])
; N64-DAG: ld [[R4:\$[0-9]+]], 8([[R2]])
; N64-DAG: dmtc1 [[R3]], $f0
; N64-DAG: dmtc1 [[R4]], $f2
; N64-DAG: ldc1 $f0, %lo(fp128)([[R2]])
; N64-DAG: ldc1 $f2, 8([[R2]])

View File

@ -23,14 +23,10 @@ entry:
; is returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to
; match the de facto ABI as implemented by GCC.
; N32-DAG: lui [[R1:\$[0-9]+]], %hi(struct_fp128)
; N32-DAG: ld [[R2:\$[0-9]+]], %lo(struct_fp128)([[R1]])
; N32-DAG: dmtc1 [[R2]], $f0
; N32-DAG: ldc1 $f0, %lo(struct_fp128)([[R1]])
; N32-DAG: addiu [[R3:\$[0-9]+]], [[R1]], %lo(struct_fp128)
; N32-DAG: ld [[R4:\$[0-9]+]], 8([[R3]])
; N32-DAG: dmtc1 [[R4]], $f1
; N32-DAG: ldc1 $f1, 8([[R3]])
; N64-DAG: lui [[R1:\$[0-9]+]], %highest(struct_fp128)
; N64-DAG: ld [[R2:\$[0-9]+]], %lo(struct_fp128)([[R1]])
; N64-DAG: dmtc1 [[R2]], $f0
; N64-DAG: ld [[R4:\$[0-9]+]], 8([[R1]])
; N64-DAG: dmtc1 [[R4]], $f1
; N64-DAG: ldc1 $f0, %lo(struct_fp128)([[R1]])
; N64-DAG: ldc1 $f1, 8([[R1]])

View File

@ -362,14 +362,13 @@ entry:
}
; LITENDIAN: v8f16_to_v16i8:
; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
; LITENDIAN: st.b [[R3]],
; LITENDIAN: .size v8f16_to_v16i8
; BIGENDIAN: v8f16_to_v16i8:
; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R1]], 177
; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
; BIGENDIAN: st.b [[R4]],
; BIGENDIAN: .size v8f16_to_v16i8
@ -431,14 +430,13 @@ entry:
}
; LITENDIAN: v8f16_to_v4i32:
; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
; LITENDIAN: st.w [[R2]],
; LITENDIAN: .size v8f16_to_v4i32
; BIGENDIAN: v8f16_to_v4i32:
; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
; BIGENDIAN: st.w [[R3]],
; BIGENDIAN: .size v8f16_to_v4i32
@ -455,14 +453,13 @@ entry:
}
; LITENDIAN: v8f16_to_v4f32:
; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
; LITENDIAN: st.w [[R2]],
; LITENDIAN: .size v8f16_to_v4f32
; BIGENDIAN: v8f16_to_v4f32:
; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
; BIGENDIAN: st.w [[R3]],
; BIGENDIAN: .size v8f16_to_v4f32
@ -479,14 +476,13 @@ entry:
}
; LITENDIAN: v8f16_to_v2i64:
; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
; LITENDIAN: st.d [[R2]],
; LITENDIAN: .size v8f16_to_v2i64
; BIGENDIAN: v8f16_to_v2i64:
; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
; BIGENDIAN: st.d [[R3]],
; BIGENDIAN: .size v8f16_to_v2i64
@ -503,14 +499,13 @@ entry:
}
; LITENDIAN: v8f16_to_v2f64:
; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
; LITENDIAN: st.d [[R2]],
; LITENDIAN: .size v8f16_to_v2f64
; BIGENDIAN: v8f16_to_v2f64:
; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
; BIGENDIAN: st.d [[R3]],
; BIGENDIAN: .size v8f16_to_v2f64