[DAGCombiner][AMDGPU][Mips] Fold bitcast with volatile loads if the resulting load is legal for the target.

Summary: I'm not sure if this patch is correct or if it needs more qualifying somehow. Bitcast shouldn't change the size of the load so it should be ok? We already do something similar for stores. We'll change the type of a volatile store if the resulting store is Legal or Custom. I'm not sure we should be allowing Custom there... I was playing around with converting X86 atomic loads/stores(except seq_cst) into regular volatile loads and stores during lowering. This would allow some special RMW isel patterns in X86InstrCompiler.td to be removed. But there's some floating point patterns in there that didn't work because we don't fold (f64 (bitconvert (i64 volatile load))) or (f32 (bitconvert (i32 volatile load))). Reviewers: efriedma, atanasyan, arsenm Reviewed By: efriedma Subscribers: jvesely, arsenm, sdardis, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, arichardson, jrtc27, atanasyan, jfb, llvm-commits Differential Revision: https://reviews.llvm.org/D50491 llvm-svn: 340797
2025-01-26 14:16:12 +00:00 · 2018-08-28 03:47:20 +00:00 · 2018-08-28 03:47:20 +00:00 · 707737eef4
commit 707737eef4
parent a72b09e6fc
5 changed files with 31 additions and 38 deletions
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -9833,12 +9833,16 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
  // fold (conv (load x)) -> (load (conv*)x)
  // If the resultant load doesn't need a higher alignment than the original!
  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
-      // Do not change the width of a volatile load.
-      !cast<LoadSDNode>(N0)->isVolatile() &&
      // Do not remove the cast if the types differ in endian layout.
      TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
          TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
-      (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
+      // If the load is volatile, we only want to change the load type if the
+      // resulting load is legal. Otherwise we might increase the number of
+      // memory accesses. We don't care if the original type was legal or not
+      // as we assume software couldn't rely on the number of accesses of an
+      // illegal type.
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isOperationLegal(ISD::LOAD, VT)) &&
      TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    unsigned OrigAlign = LN0->getAlignment();
@ -14694,6 +14698,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
  if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
      ST->isUnindexed()) {
    EVT SVT = Value.getOperand(0).getValueType();
+    // If the store is volatile, we only want to change the store type if the
+    // resulting store is legal. Otherwise we might increase the number of
+    // memory accesses. We don't care if the original type was legal or not
+    // as we assume software couldn't rely on the number of accesses of an
+    // illegal type.
    if (((!LegalOperations && !ST->isVolatile()) ||
         TLI.isOperationLegal(ISD::STORE, SVT)) &&
        TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
--- a/test/CodeGen/AMDGPU/copy-illegal-type.ll
+++ b/test/CodeGen/AMDGPU/copy-illegal-type.ll
@ -147,10 +147,7 @@ define amdgpu_kernel void @test_copy_v3i8_align1(<3 x i8> addrspace(1)* %out, <3
 }

 ; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_load:
-; GCN: {{buffer|flat}}_load_ubyte
-; GCN: {{buffer|flat}}_load_ubyte
-; GCN: {{buffer|flat}}_load_ubyte
-; GCN: {{buffer|flat}}_load_ubyte
+; GCN: {{buffer|flat}}_load_dword
 ; GCN: buffer_store_dword
 ; GCN: s_endpgm
 define amdgpu_kernel void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
--- a/test/CodeGen/Mips/cconv/return-hard-fp128.ll
+++ b/test/CodeGen/Mips/cconv/return-hard-fp128.ll
@ -18,14 +18,10 @@ entry:
 }

 ; ALL-LABEL: retldouble:
-; N32-DAG:           ld [[R2:\$[0-9]+]], %lo(fp128)([[R1:\$[0-9]+]])
+; N32-DAG:           ldc1 $f0, %lo(fp128)([[R1:\$[0-9]+]])
 ; N32-DAG:           addiu [[R3:\$[0-9]+]], [[R1]], %lo(fp128)
-; N32-DAG:           ld [[R4:\$[0-9]+]], 8([[R3]])
-; N32-DAG:           dmtc1 [[R2]], $f0
-; N32-DAG:           dmtc1 [[R4]], $f2
+; N32-DAG:           ldc1 $f2, 8([[R3]])

 ; N64-DAG:           lui [[R2:\$[0-9]+]], %highest(fp128)
-; N64-DAG:           ld [[R3:\$[0-9]+]], %lo(fp128)([[R2]])
-; N64-DAG:           ld [[R4:\$[0-9]+]], 8([[R2]])
-; N64-DAG:           dmtc1 [[R3]], $f0
-; N64-DAG:           dmtc1 [[R4]], $f2
+; N64-DAG:           ldc1 $f0, %lo(fp128)([[R2]])
+; N64-DAG:           ldc1 $f2, 8([[R2]])
--- a/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
+++ b/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
@ -23,14 +23,10 @@ entry:
 ; is returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to
 ; match the de facto ABI as implemented by GCC.
 ; N32-DAG:        lui [[R1:\$[0-9]+]], %hi(struct_fp128)
-; N32-DAG:        ld  [[R2:\$[0-9]+]], %lo(struct_fp128)([[R1]])
-; N32-DAG:        dmtc1 [[R2]], $f0
+; N32-DAG:        ldc1 $f0, %lo(struct_fp128)([[R1]])
 ; N32-DAG:        addiu [[R3:\$[0-9]+]], [[R1]], %lo(struct_fp128)
-; N32-DAG:        ld  [[R4:\$[0-9]+]], 8([[R3]])
-; N32-DAG:        dmtc1 [[R4]], $f1
+; N32-DAG:        ldc1  $f1, 8([[R3]])

 ; N64-DAG:        lui  [[R1:\$[0-9]+]], %highest(struct_fp128)
-; N64-DAG:        ld  [[R2:\$[0-9]+]], %lo(struct_fp128)([[R1]])
-; N64-DAG:        dmtc1 [[R2]], $f0
-; N64-DAG:        ld  [[R4:\$[0-9]+]], 8([[R1]])
-; N64-DAG:        dmtc1 [[R4]], $f1
+; N64-DAG:        ldc1 $f0, %lo(struct_fp128)([[R1]])
+; N64-DAG:        ldc1 $f1, 8([[R1]])
--- a/test/CodeGen/Mips/msa/bitcast.ll
+++ b/test/CodeGen/Mips/msa/bitcast.ll
@ -362,14 +362,13 @@ entry:
 }

 ; LITENDIAN: v8f16_to_v16i8:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
 ; LITENDIAN: st.b [[R3]],
 ; LITENDIAN: .size v8f16_to_v16i8

 ; BIGENDIAN: v8f16_to_v16i8:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
 ; BIGENDIAN: st.b [[R4]],
 ; BIGENDIAN: .size v8f16_to_v16i8
@ -431,14 +430,13 @@ entry:
 }

 ; LITENDIAN: v8f16_to_v4i32:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 ; LITENDIAN: st.w [[R2]],
 ; LITENDIAN: .size v8f16_to_v4i32

 ; BIGENDIAN: v8f16_to_v4i32:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
 ; BIGENDIAN: st.w [[R3]],
 ; BIGENDIAN: .size v8f16_to_v4i32
@ -455,14 +453,13 @@ entry:
 }

 ; LITENDIAN: v8f16_to_v4f32:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 ; LITENDIAN: st.w [[R2]],
 ; LITENDIAN: .size v8f16_to_v4f32

 ; BIGENDIAN: v8f16_to_v4f32:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
 ; BIGENDIAN: st.w [[R3]],
 ; BIGENDIAN: .size v8f16_to_v4f32
@ -479,14 +476,13 @@ entry:
 }

 ; LITENDIAN: v8f16_to_v2i64:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 ; LITENDIAN: st.d [[R2]],
 ; LITENDIAN: .size v8f16_to_v2i64

 ; BIGENDIAN: v8f16_to_v2i64:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
 ; BIGENDIAN: st.d [[R3]],
 ; BIGENDIAN: .size v8f16_to_v2i64
@ -503,14 +499,13 @@ entry:
 }

 ; LITENDIAN: v8f16_to_v2f64:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 ; LITENDIAN: st.d [[R2]],
 ; LITENDIAN: .size v8f16_to_v2f64

 ; BIGENDIAN: v8f16_to_v2f64:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
 ; BIGENDIAN: st.d [[R3]],
 ; BIGENDIAN: .size v8f16_to_v2f64