[NFC] Encapsulate MemOp logic

Summary: This patch simply introduces functions instead of directly accessing the fields. This helps introducing additional check logic. A second patch will add simplifying functions. Reviewers: courbet Subscribers: arsenm, nemanjai, jvesely, nhaehnle, hiraditya, kbarton, jsji, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D73945
2025-02-28 00:07:22 +00:00 · 2020-02-04 09:52:21 +01:00 · 2020-02-04 09:52:21 +01:00 · 293e799dfc
commit 293e799dfc
parent 13b3bdf322
10 changed files with 93 additions and 75 deletions
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@ -108,10 +108,13 @@ namespace Sched {

 // MemOp models a memory operation, either memset or memcpy/memmove.
 struct MemOp {
+private:
  // Shared
  uint64_t Size;
-  uint64_t DstAlign; // Specified alignment of the memory operation or zero if
-                     // destination alignment can satisfy any constraint.
+  bool DstAlignCanChange; // true if destination alignment can satisfy any
+                          // constraint.
+  Align DstAlign;         // Specified alignment of the memory operation.
+
  bool AllowOverlap;
  // memset only
  bool IsMemset;   // If setthis memory operation is a memset.
@ -119,34 +122,47 @@ struct MemOp {
  // memcpy only
  bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register
                     // constant so it does not need to be loaded.
-  uint64_t SrcAlign; // Inferred alignment of the source or zero if the memory
-                     // operation does not need to load the value.
-
+  Align SrcAlign;    // Inferred alignment of the source or default value if the
+                     // memory operation does not need to load the value.
+public:
  static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign,
                    Align SrcAlign, bool IsVolatile,
                    bool MemcpyStrSrc = false) {
-    return {
-        /*.Size =*/Size,
-        /*.DstAlign =*/DstAlignCanChange ? 0 : DstAlign.value(),
-        /*.AllowOverlap =*/!IsVolatile,
-        /*.IsMemset =*/false,
-        /*.ZeroMemset =*/false,
-        /*.MemcpyStrSrc =*/MemcpyStrSrc,
-        /*.SrcAlign =*/SrcAlign.value(),
-    };
+    MemOp Op;
+    Op.Size = Size;
+    Op.DstAlignCanChange = DstAlignCanChange;
+    Op.DstAlign = DstAlign;
+    Op.AllowOverlap = !IsVolatile;
+    Op.IsMemset = false;
+    Op.ZeroMemset = false;
+    Op.MemcpyStrSrc = MemcpyStrSrc;
+    Op.SrcAlign = SrcAlign;
+    return Op;
  }
+
  static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign,
                   bool IsZeroMemset, bool IsVolatile) {
-    return {
-        /*.Size =*/Size,
-        /*.DstAlign =*/DstAlignCanChange ? 0 : DstAlign.value(),
-        /*.AllowOverlap =*/!IsVolatile,
-        /*.IsMemset =*/true,
-        /*.ZeroMemset =*/IsZeroMemset,
-        /*.MemcpyStrSrc =*/false,
-        /*.SrcAlign =*/0,
-    };
+    MemOp Op;
+    Op.Size = Size;
+    Op.DstAlignCanChange = DstAlignCanChange;
+    Op.DstAlign = DstAlign;
+    Op.AllowOverlap = !IsVolatile;
+    Op.IsMemset = true;
+    Op.ZeroMemset = IsZeroMemset;
+    Op.MemcpyStrSrc = false;
+    return Op;
  }
+
+  uint64_t size() const { return Size; }
+  uint64_t getDstAlign() const {
+    return DstAlignCanChange ? 0 : DstAlign.value();
+  }
+  bool allowOverlap() const { return AllowOverlap; }
+  bool isMemset() const { return IsMemset; }
+  bool isMemcpy() const { return !IsMemset; }
+  bool isZeroMemset() const { return ZeroMemset; }
+  bool isMemcpyStrSrc() const { return MemcpyStrSrc; }
+  uint64_t getSrcAlign() const { return isMemset() ? 0 : SrcAlign.value(); }
 };

 /// This base class for TargetLowering contains the SelectionDAG-independent
--- a/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@ -860,7 +860,7 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
                                          unsigned DstAS, unsigned SrcAS,
                                          const AttributeList &FuncAttributes,
                                          const TargetLowering &TLI) {
-  if (Op.SrcAlign != 0 && Op.SrcAlign < Op.DstAlign)
+  if (Op.getSrcAlign() != 0 && Op.getSrcAlign() < Op.getDstAlign())
    return false;

  LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
@ -870,15 +870,15 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
    // We only need to check DstAlign here as SrcAlign is always greater or
    // equal to DstAlign (or zero).
    Ty = LLT::scalar(64);
-    while (Op.DstAlign && Op.DstAlign < Ty.getSizeInBytes() &&
-           !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.DstAlign))
+    while (Op.getDstAlign() && Op.getDstAlign() < Ty.getSizeInBytes() &&
+           !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
      Ty = LLT::scalar(Ty.getSizeInBytes());
    assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
    // FIXME: check for the largest legal type we can load/store to.
  }

  unsigned NumMemOps = 0;
-  auto Size = Op.Size;
+  auto Size = Op.size();
  while (Size != 0) {
    unsigned TySize = Ty.getSizeInBytes();
    while (TySize > Size) {
@ -897,9 +897,9 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
      bool Fast;
      // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
      MVT VT = getMVTForLLT(Ty);
-      if (NumMemOps && Op.AllowOverlap && NewTySize < Size &&
+      if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
          TLI.allowsMisalignedMemoryAccesses(
-              VT, DstAS, Op.DstAlign, MachineMemOperand::MONone, &Fast) &&
+              VT, DstAS, Op.getDstAlign(), MachineMemOperand::MONone, &Fast) &&
          Fast)
        TySize = Size;
      else {
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@ -186,7 +186,7 @@ bool TargetLowering::findOptimalMemOpLowering(
  // means it's possible to change the alignment of the destination.
  // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
  // not need to be loaded.
-  if (!(Op.SrcAlign == 0 || Op.SrcAlign >= Op.DstAlign))
+  if (!(Op.getSrcAlign() == 0 || Op.getSrcAlign() >= Op.getDstAlign()))
    return false;

  EVT VT = getOptimalMemOpType(Op, FuncAttributes);
@ -196,8 +196,8 @@ bool TargetLowering::findOptimalMemOpLowering(
    // We only need to check DstAlign here as SrcAlign is always greater or
    // equal to DstAlign (or zero).
    VT = MVT::i64;
-    while (Op.DstAlign && Op.DstAlign < VT.getSizeInBits() / 8 &&
-           !allowsMisalignedMemoryAccesses(VT, DstAS, Op.DstAlign))
+    while (Op.getDstAlign() && Op.getDstAlign() < VT.getSizeInBits() / 8 &&
+           !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
      VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
    assert(VT.isInteger());

@ -214,7 +214,7 @@ bool TargetLowering::findOptimalMemOpLowering(
  }

  unsigned NumMemOps = 0;
-  auto Size = Op.Size;
+  auto Size = Op.size();
  while (Size != 0) {
    unsigned VTSize = VT.getSizeInBits() / 8;
    while (VTSize > Size) {
@ -249,8 +249,8 @@ bool TargetLowering::findOptimalMemOpLowering(
      // If the new VT cannot cover all of the remaining bits, then consider
      // issuing a (or a pair of) unaligned and overlapping load / store.
      bool Fast;
-      if (NumMemOps && Op.AllowOverlap && NewVTSize < Size &&
-          allowsMisalignedMemoryAccesses(VT, DstAS, Op.DstAlign,
+      if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
+          allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign(),
                                         MachineMemOperand::MONone, &Fast) &&
          Fast)
        VTSize = Size;
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -9436,9 +9436,9 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
  // Only use AdvSIMD to implement memset of 32-byte and above. It would have
  // taken one instruction to materialize the v2i64 zero and one store (with
  // restrictive addressing mode). Just do i64 stores.
-  bool IsSmallMemset = Op.IsMemset && Op.Size < 32;
+  bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
  auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
-    if (memOpAlign(Op.SrcAlign, Op.DstAlign, AlignCheck))
+    if (memOpAlign(Op.getSrcAlign(), Op.getDstAlign(), AlignCheck))
      return true;
    bool Fast;
    return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone,
@ -9446,14 +9446,14 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
           Fast;
  };

-  if (CanUseNEON && Op.IsMemset && !IsSmallMemset &&
+  if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
      AlignmentIsAcceptable(MVT::v2i64, 16))
    return MVT::v2i64;
  if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
    return MVT::f128;
-  if (Op.Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
+  if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
    return MVT::i64;
-  if (Op.Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
+  if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
    return MVT::i32;
  return MVT::Other;
 }
@ -9467,9 +9467,9 @@ LLT AArch64TargetLowering::getOptimalMemOpLLT(
  // Only use AdvSIMD to implement memset of 32-byte and above. It would have
  // taken one instruction to materialize the v2i64 zero and one store (with
  // restrictive addressing mode). Just do i64 stores.
-  bool IsSmallMemset = Op.IsMemset && Op.Size < 32;
+  bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
  auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
-    if (memOpAlign(Op.SrcAlign, Op.DstAlign, AlignCheck))
+    if (memOpAlign(Op.getSrcAlign(), Op.getDstAlign(), AlignCheck))
      return true;
    bool Fast;
    return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone,
@ -9477,14 +9477,14 @@ LLT AArch64TargetLowering::getOptimalMemOpLLT(
           Fast;
  };

-  if (CanUseNEON && Op.IsMemset && !IsSmallMemset &&
+  if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
      AlignmentIsAcceptable(MVT::v2i64, 16))
    return LLT::vector(2, 64);
  if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
    return LLT::scalar(128);
-  if (Op.Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
+  if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
    return LLT::scalar(64);
-  if (Op.Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
+  if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
    return LLT::scalar(32);
  return LLT();
 }
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@ -1326,10 +1326,11 @@ EVT SITargetLowering::getOptimalMemOpType(
  // The default fallback uses the private pointer size as a guess for a type to
  // use. Make sure we switch these to 64-bit accesses.

-  if (Op.Size >= 16 && Op.DstAlign >= 4) // XXX: Should only do for global
+  if (Op.size() >= 16 &&
+      Op.getDstAlign() >= 4) // XXX: Should only do for global
    return MVT::v4i32;

-  if (Op.Size >= 8 && Op.DstAlign >= 4)
+  if (Op.size() >= 8 && Op.getDstAlign() >= 4)
    return MVT::v2i32;

  // Use the default.
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@ -15027,17 +15027,17 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
 EVT ARMTargetLowering::getOptimalMemOpType(
    const MemOp &Op, const AttributeList &FuncAttributes) const {
  // See if we can use NEON instructions for this...
-  if ((!Op.IsMemset || Op.ZeroMemset) && Subtarget->hasNEON() &&
+  if ((!Op.isMemset() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
      !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
    bool Fast;
-    if (Op.Size >= 16 &&
-        (memOpAlign(Op.SrcAlign, Op.DstAlign, 16) ||
+    if (Op.size() >= 16 &&
+        (memOpAlign(Op.getSrcAlign(), Op.getDstAlign(), 16) ||
         (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1,
                                         MachineMemOperand::MONone, &Fast) &&
          Fast))) {
      return MVT::v2f64;
-    } else if (Op.Size >= 8 &&
-               (memOpAlign(Op.SrcAlign, Op.DstAlign, 8) ||
+    } else if (Op.size() >= 8 &&
+               (memOpAlign(Op.getSrcAlign(), Op.getDstAlign(), 8) ||
                (allowsMisalignedMemoryAccesses(
                     MVT::f64, 0, 1, MachineMemOperand::MONone, &Fast) &&
                 Fast))) {
--- a/lib/Target/BPF/BPFISelLowering.h
+++ b/lib/Target/BPF/BPFISelLowering.h
@ -101,7 +101,7 @@ private:

  EVT getOptimalMemOpType(const MemOp &Op,
                          const AttributeList &FuncAttributes) const override {
-    return Op.Size >= 8 ? MVT::i64 : MVT::i32;
+    return Op.size() >= 8 ? MVT::i64 : MVT::i32;
  }

  bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@ -3385,14 +3385,14 @@ EVT HexagonTargetLowering::getOptimalMemOpType(
    return (GivenA % MinA) == 0;
  };

-  if (Op.Size >= 8 && Aligned(Op.DstAlign, 8) &&
-      (Op.IsMemset || Aligned(Op.SrcAlign, 8)))
+  if (Op.size() >= 8 && Aligned(Op.getDstAlign(), 8) &&
+      (Op.isMemset() || Aligned(Op.getSrcAlign(), 8)))
    return MVT::i64;
-  if (Op.Size >= 4 && Aligned(Op.DstAlign, 4) &&
-      (Op.IsMemset || Aligned(Op.SrcAlign, 4)))
+  if (Op.size() >= 4 && Aligned(Op.getDstAlign(), 4) &&
+      (Op.isMemset() || Aligned(Op.getSrcAlign(), 4)))
    return MVT::i32;
-  if (Op.Size >= 2 && Aligned(Op.DstAlign, 2) &&
-      (Op.IsMemset || Aligned(Op.SrcAlign, 2)))
+  if (Op.size() >= 2 && Aligned(Op.getDstAlign(), 2) &&
+      (Op.isMemset() || Aligned(Op.getSrcAlign(), 2)))
    return MVT::i16;

  return MVT::Other;
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@ -15078,20 +15078,20 @@ EVT PPCTargetLowering::getOptimalMemOpType(
  if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
    // When expanding a memset, require at least two QPX instructions to cover
    // the cost of loading the value to be stored from the constant pool.
-    if (Subtarget.hasQPX() && Op.Size >= 32 &&
-        (!Op.IsMemset || Op.Size >= 64) &&
-        (!Op.SrcAlign || Op.SrcAlign >= 32) &&
-        (!Op.DstAlign || Op.DstAlign >= 32) &&
+    if (Subtarget.hasQPX() && Op.size() >= 32 &&
+        (!Op.isMemset() || Op.size() >= 64) &&
+        (!Op.getSrcAlign() || Op.getSrcAlign() >= 32) &&
+        (!Op.getDstAlign() || Op.getDstAlign() >= 32) &&
        !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
      return MVT::v4f64;
    }

    // We should use Altivec/VSX loads and stores when available. For unaligned
    // addresses, unaligned VSX loads are only fast starting with the P8.
-    if (Subtarget.hasAltivec() && Op.Size >= 16 &&
-        (((!Op.SrcAlign || Op.SrcAlign >= 16) &&
-          (!Op.DstAlign || Op.DstAlign >= 16)) ||
-         ((Op.IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+    if (Subtarget.hasAltivec() && Op.size() >= 16 &&
+        (((!Op.getSrcAlign() || Op.getSrcAlign() >= 16) &&
+          (!Op.getDstAlign() || Op.getDstAlign() >= 16)) ||
+         ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
      return MVT::v4i32;
  }

--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -2252,16 +2252,17 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
 EVT X86TargetLowering::getOptimalMemOpType(
    const MemOp &Op, const AttributeList &FuncAttributes) const {
  if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
-    if (Op.Size >= 16 && (!Subtarget.isUnalignedMem16Slow() ||
-                          ((Op.DstAlign == 0 || Op.DstAlign >= 16) &&
-                           (Op.SrcAlign == 0 || Op.SrcAlign >= 16)))) {
+    if (Op.size() >= 16 &&
+        (!Subtarget.isUnalignedMem16Slow() ||
+         ((Op.getDstAlign() == 0 || Op.getDstAlign() >= 16) &&
+          (Op.getSrcAlign() == 0 || Op.getSrcAlign() >= 16)))) {
      // FIXME: Check if unaligned 64-byte accesses are slow.
-      if (Op.Size >= 64 && Subtarget.hasAVX512() &&
+      if (Op.size() >= 64 && Subtarget.hasAVX512() &&
          (Subtarget.getPreferVectorWidth() >= 512)) {
        return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
      }
      // FIXME: Check if unaligned 32-byte accesses are slow.
-      if (Op.Size >= 32 && Subtarget.hasAVX() &&
+      if (Op.size() >= 32 && Subtarget.hasAVX() &&
          (Subtarget.getPreferVectorWidth() >= 256)) {
        // Although this isn't a well-supported type for AVX1, we'll let
        // legalization and shuffle lowering produce the optimal codegen. If we
@ -2277,8 +2278,8 @@ EVT X86TargetLowering::getOptimalMemOpType(
      if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
          (Subtarget.getPreferVectorWidth() >= 128))
        return MVT::v4f32;
-    } else if ((!Op.IsMemset || Op.ZeroMemset) && !Op.MemcpyStrSrc &&
-               Op.Size >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
+    } else if ((!Op.isMemset() || Op.isZeroMemset()) && !Op.isMemcpyStrSrc() &&
+               Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
      // Do not use f64 to lower memcpy if source is string constant. It's
      // better to use i32 to avoid the loads.
      // Also, do not use f64 to lower memset unless this is a memset of zeros.
@ -2291,7 +2292,7 @@ EVT X86TargetLowering::getOptimalMemOpType(
  // This is a compromise. If we reach here, unaligned accesses may be slow on
  // this target. However, creating smaller, aligned accesses could be even
  // slower and would certainly be a lot more code.
-  if (Subtarget.is64Bit() && Op.Size >= 8)
+  if (Subtarget.is64Bit() && Op.size() >= 8)
    return MVT::i64;
  return MVT::i32;
 }