[SystemZ] Fix ABI for i128 argument and return types

According to the SystemZ ABI, 128-bit integer types should be passed and returned via implicit reference. However, this is not currently implemented at the LLVM IR level for the i128 type. This does not matter when compiling C/C++ code, since clang will implement the implicit reference itself. However, it turns out that when calling libgcc helper routines operating on 128-bit integers, LLVM will use i128 argument and return value types; the resulting code is not compatible with the ABI used in libgcc, leading to crashes (see PR26559). This should be simple to fix, except that i128 currently is not even a legal type for the SystemZ back end. Therefore, common code will already split arguments and return values into multiple parts. The bulk of this patch therefore consists of detecting such parts, and correctly handling passing via implicit reference of a value split into multiple parts. If at some time in the future, i128 becomes a legal type, this code can be removed again. This fixes PR26559. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@261325 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-08 03:01:58 +00:00 · 2016-02-19 14:10:21 +00:00 · 2016-02-19 14:10:21 +00:00 · c80aad901b
commit c80aad901b
parent fe6345c537
6 changed files with 198 additions and 10 deletions
--- a/lib/Target/SystemZ/SystemZCallingConv.cpp
+++ b/lib/Target/SystemZ/SystemZCallingConv.cpp
@ -12,10 +12,10 @@

 using namespace llvm;

-const unsigned SystemZ::ArgGPRs[SystemZ::NumArgGPRs] = {
+const MCPhysReg SystemZ::ArgGPRs[SystemZ::NumArgGPRs] = {
  SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, SystemZ::R5D, SystemZ::R6D
 };

-const unsigned SystemZ::ArgFPRs[SystemZ::NumArgFPRs] = {
+const MCPhysReg SystemZ::ArgFPRs[SystemZ::NumArgFPRs] = {
  SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D
 };
--- a/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/lib/Target/SystemZ/SystemZCallingConv.h
@ -12,14 +12,15 @@

 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/MC/MCRegisterInfo.h"

 namespace llvm {
 namespace SystemZ {
  const unsigned NumArgGPRs = 5;
-  extern const unsigned ArgGPRs[NumArgGPRs];
+  extern const MCPhysReg ArgGPRs[NumArgGPRs];

  const unsigned NumArgFPRs = 4;
-  extern const unsigned ArgFPRs[NumArgFPRs];
+  extern const MCPhysReg ArgFPRs[NumArgFPRs];
 } // end namespace SystemZ

 class SystemZCCState : public CCState {
@ -79,6 +80,51 @@ public:
  bool IsShortVector(unsigned ValNo) { return ArgIsShortVector[ValNo]; }
 };

+// Handle i128 argument types.  These need to be passed by implicit
+// reference.  This could be as simple as the following .td line:
+//    CCIfType<[i128], CCPassIndirect<i64>>,
+// except that i128 is not a legal type, and therefore gets split by
+// common code into a pair of i64 arguments.
+inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT,
+                                    MVT &LocVT,
+                                    CCValAssign::LocInfo &LocInfo,
+                                    ISD::ArgFlagsTy &ArgFlags,
+                                    CCState &State) {
+  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+  // ArgFlags.isSplit() is true on the first part of a i128 argument;
+  // PendingMembers.empty() is false on all subsequent parts.
+  if (!ArgFlags.isSplit() && PendingMembers.empty())
+    return false;
+
+  // Push a pending Indirect value location for each part.
+  LocVT = MVT::i64;
+  LocInfo = CCValAssign::Indirect;
+  PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT,
+                                                   LocVT, LocInfo));
+  if (!ArgFlags.isSplitEnd())
+    return true;
+
+  // OK, we've collected all parts in the pending list.  Allocate
+  // the location (register or stack slot) for the indirect pointer.
+  // (This duplicates the usual i64 calling convention rules.)
+  unsigned Reg = State.AllocateReg(SystemZ::ArgGPRs);
+  unsigned Offset = Reg ? 0 : State.AllocateStack(8, 8);
+
+  // Use that same location for all the pending parts.
+  for (auto &It : PendingMembers) {
+    if (Reg)
+      It.convertToReg(Reg);
+    else
+      It.convertToMem(Offset);
+    State.addLoc(It);
+  }
+
+  PendingMembers.clear();
+
+  return true;
+}
+
 } // end namespace llvm

 #endif
--- a/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/lib/Target/SystemZ/SystemZCallingConv.td
@ -67,6 +67,9 @@ def CC_SystemZ : CallingConv<[

  // Force long double values to the stack and pass i64 pointers to them.
  CCIfType<[f128], CCPassIndirect<i64>>,
+  // Same for i128 values.  These are already split into two i64 here,
+  // so we have to use a custom handler.
+  CCIfType<[i64], CCCustom<"CC_SystemZ_I128Indirect">>,

  // The first 5 integer arguments are passed in R2-R6.  Note that R6
  // is call-saved.
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@ -813,9 +813,6 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL,

  if (VA.isExtInLoc())
    Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
-  else if (VA.getLocInfo() == CCValAssign::Indirect)
-    Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value,
-                        MachinePointerInfo(), false, false, false, 0);
  else if (VA.getLocInfo() == CCValAssign::BCvt) {
    // If this is a short vector argument loaded from the stack,
    // extend from i64 to full vector size and then bitcast.
@ -868,6 +865,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
      MF.getInfo<SystemZMachineFunctionInfo>();
  auto *TFL =
      static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());

  // Detect unsupported vector argument types.
  if (Subtarget.hasVector())
@ -930,7 +928,6 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
      // Create the SelectionDAG nodes corresponding to a load
      // from this parameter.  Unpromoted ints and floats are
      // passed as right-justified 8-byte values.
-      EVT PtrVT = getPointerTy(DAG.getDataLayout());
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
      if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
        FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
@ -942,7 +939,26 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,

    // Convert the value of the argument register into the value that's
    // being passed.
-    InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
+    if (VA.getLocInfo() == CCValAssign::Indirect) {
+      InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain,
+                                   ArgValue, MachinePointerInfo(),
+                                   false, false, false, 0));
+      // If the original argument was split (e.g. i128), we need
+      // to load all parts of it here (using the same address).
+      unsigned ArgIndex = Ins[I].OrigArgIndex;
+      assert (Ins[I].PartOffset == 0);
+      while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
+        CCValAssign &PartVA = ArgLocs[I + 1];
+        unsigned PartOffset = Ins[I + 1].PartOffset;
+        SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
+                                      DAG.getIntPtrConstant(PartOffset, DL));
+        InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain,
+                                     Address, MachinePointerInfo(),
+                                     false, false, false, 0));
+        ++I;
+      }
+    } else
+      InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
  }

  if (IsVarArg) {
@ -1054,11 +1070,25 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,

    if (VA.getLocInfo() == CCValAssign::Indirect) {
      // Store the argument in a stack slot and pass its address.
-      SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+      SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT);
      int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
      MemOpChains.push_back(DAG.getStore(
          Chain, DL, ArgValue, SpillSlot,
          MachinePointerInfo::getFixedStack(MF, FI), false, false, 0));
+      // If the original argument was split (e.g. i128), we need
+      // to store all parts of it here (and pass just one address).
+      unsigned ArgIndex = Outs[I].OrigArgIndex;
+      assert (Outs[I].PartOffset == 0);
+      while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
+        SDValue PartValue = OutVals[I + 1];
+        unsigned PartOffset = Outs[I + 1].PartOffset;
+        SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
+                                      DAG.getIntPtrConstant(PartOffset, DL));
+        MemOpChains.push_back(DAG.getStore(
+            Chain, DL, PartValue, Address,
+            MachinePointerInfo::getFixedStack(MF, FI), false, false, 0));
+        ++I;
+      }
      ArgValue = SpillSlot;
    } else
      ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
@ -1180,6 +1210,12 @@ CanLowerReturn(CallingConv::ID CallConv,
  if (Subtarget.hasVector())
    VerifyVectorTypes(Outs);

+  // Special case that we cannot easily detect in RetCC_SystemZ since
+  // i128 is not a legal type.
+  for (auto &Out : Outs)
+    if (Out.ArgVT == MVT::i128)
+      return false;
+
  SmallVector<CCValAssign, 16> RetLocs;
  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
--- a/test/CodeGen/SystemZ/args-09.ll
+++ b/test/CodeGen/SystemZ/args-09.ll
@ -0,0 +1,53 @@
+; Test the handling of i128 argument values
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-INT
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-I128-1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-I128-2
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-STACK
+
+declare void @bar(i64, i64, i64, i64, i128,
+                  i64, i64, i64, i64, i128)
+
+; There are two indirect i128 slots, one at offset 200 (the first available
+; byte after the outgoing arguments) and one immediately after it at 216.
+; These slots should be set up outside the glued call sequence, so would
+; normally use %f0/%f2 as the first available 128-bit pair.  This choice
+; is hard-coded in the I128 tests.
+;
+; The order of the CHECK-STACK stores doesn't matter.  It would be OK to reorder
+; them in response to future code changes.
+define void @foo() {
+; CHECK-INT-LABEL: foo:
+; CHECK-INT-DAG: lghi %r2, 1
+; CHECK-INT-DAG: lghi %r3, 2
+; CHECK-INT-DAG: lghi %r4, 3
+; CHECK-INT-DAG: lghi %r5, 4
+; CHECK-INT-DAG: la %r6, {{200|216}}(%r15)
+; CHECK-INT: brasl %r14, bar@PLT
+;
+; CHECK-I128-1-LABEL: foo:
+; CHECK-I128-1: aghi %r15, -232
+; CHECK-I128-1-DAG: mvghi 200(%r15), 0
+; CHECK-I128-1-DAG: mvghi 208(%r15), 0
+; CHECK-I128-1: brasl %r14, bar@PLT
+;
+; CHECK-I128-2-LABEL: foo:
+; CHECK-I128-2: aghi %r15, -232
+; CHECK-I128-2-DAG: mvghi 216(%r15), 0
+; CHECK-I128-2-DAG: mvghi 224(%r15), 0
+; CHECK-I128-2: brasl %r14, bar@PLT
+;
+; CHECK-STACK-LABEL: foo:
+; CHECK-STACK: aghi %r15, -232
+; CHECK-STACK: la [[REGISTER:%r[0-5]+]], {{200|216}}(%r15)
+; CHECK-STACK: stg [[REGISTER]], 192(%r15)
+; CHECK-STACK: mvghi 184(%r15), 8
+; CHECK-STACK: mvghi 176(%r15), 7
+; CHECK-STACK: mvghi 168(%r15), 6
+; CHECK-STACK: mvghi 160(%r15), 5
+; CHECK-STACK: brasl %r14, bar@PLT
+
+  call void @bar (i64 1, i64 2, i64 3, i64 4, i128 0,
+                  i64 5, i64 6, i64 7, i64 8, i128 0)
+  ret void
+}
--- a/test/CodeGen/SystemZ/args-10.ll
+++ b/test/CodeGen/SystemZ/args-10.ll
@ -0,0 +1,50 @@
+; Test incoming i128 arguments.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Do some arithmetic so that we can see the register being used.
+define void @f1(i128 *%r2, i16 %r3, i32 %r4, i64 %r5, i128 %r6) {
+; CHECK-LABEL: f1:
+; CHECK-DAG:  lg [[REGL:%r[0-5]+]], 8(%r6)
+; CHECK-DAG:  lg [[REGH:%r[0-5]+]], 0(%r6)
+; CHECK:      algr [[REGL]], [[REGL]]
+; CHECK-NEXT: alcgr [[REGH]], [[REGH]]
+; CHECK-DAG:  stg [[REGL]], 8(%r2)
+; CHECK-DAG:  stg [[REGH]], 0(%r2)
+; CHECK:      br %r14
+  %y = add i128 %r6, %r6
+  store i128 %y, i128 *%r2
+  ret void
+}
+
+; Test a case where the i128 address is passed on the stack.
+define void @f2(i128 *%r2, i16 %r3, i32 %r4, i64 %r5,
+                i128 %r6, i64 %s1, i64 %s2, i128 %s4) {
+; CHECK-LABEL: f2:
+; CHECK:      lg [[ADDR:%r[1-5]+]], 176(%r15)
+; CHECK-DAG:  lg [[REGL:%r[0-5]+]], 8([[ADDR]])
+; CHECK-DAG:  lg [[REGH:%r[0-5]+]], 0([[ADDR]])
+; CHECK:      algr [[REGL]], [[REGL]]
+; CHECK-NEXT: alcgr [[REGH]], [[REGH]]
+; CHECK-DAG:  stg [[REGL]], 8(%r2)
+; CHECK-DAG:  stg [[REGH]], 0(%r2)
+; CHECK:      br %r14
+  %y = add i128 %s4, %s4
+  store i128 %y, i128 *%r2
+  ret void
+}
+
+; Explicit i128 return values are likewise passed indirectly.
+define i128 @f14(i128 %r3) {
+; CHECK-LABEL: f14:
+; CHECK-DAG:  lg [[REGL:%r[0-5]+]], 8(%r3)
+; CHECK-DAG:  lg [[REGH:%r[0-5]+]], 0(%r3)
+; CHECK:      algr [[REGL]], [[REGL]]
+; CHECK-NEXT: alcgr [[REGH]], [[REGH]]
+; CHECK-DAG:  stg [[REGL]], 8(%r2)
+; CHECK-DAG:  stg [[REGH]], 0(%r2)
+; CHECK:      br %r14
+  %y = add i128 %r3, %r3
+  ret i128 %y
+}
+