From 6a079fef4fad3e6c2e07c9e1d0776e20a0b05b1e Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 16 Aug 2013 10:55:47 +0000 Subject: [PATCH] [SystemZ] Fix handling of 64-bit memcmp results Generalize r188163 to cope with return types other than MVT::i32, just as the existing visitMemCmpCall code did. I've split this out into a subroutine so that it can be used for other upcoming patches. I also noticed that I'd used the wrong API to record the out chain. It's a load that uses DAG.getRoot() rather than getRoot(), so the out chain should go on PendingLoads. I don't have a testcase for that because we don't do any interesting scheduling on z yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188540 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/SelectionDAGBuilder.cpp | 47 +++--- .../SelectionDAG/SelectionDAGBuilder.h | 3 + lib/Target/SystemZ/SystemZInstrInfo.cpp | 7 + test/CodeGen/SystemZ/memcmp-01.ll | 2 +- test/CodeGen/SystemZ/memcmp-02.ll | 135 ++++++++++++++++++ 5 files changed, 174 insertions(+), 20 deletions(-) create mode 100644 test/CodeGen/SystemZ/memcmp-02.ll diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e97d09aeb2a..7a9a4d77d2d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5448,6 +5448,18 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, return LoadVal; } +/// processIntegerCallValue - Record the value for an instruction that +/// produces an integer result, converting the type where necessary. +void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, + SDValue Value, + bool IsSigned) { + EVT VT = TM.getTargetLowering()->getValueType(I.getType(), true); + if (IsSigned) + Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); + else + Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); + setValue(&I, Value); +} /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. /// If so, return true and lower it, otherwise return false and it will be @@ -5463,35 +5475,33 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { !I.getType()->isIntegerTy()) return false; - const ConstantInt *Size = dyn_cast(I.getArgOperand(2)); - if (Size && Size->getZExtValue() == 0) { + const Value *Size = I.getArgOperand(2); + const ConstantInt *CSize = dyn_cast(Size); + if (CSize && CSize->getZExtValue() == 0) { EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true); setValue(&I, DAG.getConstant(0, CallVT)); return true; } - const Value *Arg0 = I.getArgOperand(0); - const Value *Arg1 = I.getArgOperand(1); - const Value *Arg2 = I.getArgOperand(2); const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), - getValue(Arg0), getValue(Arg1), getValue(Arg2), - MachinePointerInfo(Arg0), - MachinePointerInfo(Arg1)); + getValue(LHS), getValue(RHS), getValue(Size), + MachinePointerInfo(LHS), + MachinePointerInfo(RHS)); if (Res.first.getNode()) { - setValue(&I, Res.first); - DAG.setRoot(Res.second); + processIntegerCallValue(I, Res.first, true); + PendingLoads.push_back(Res.second); return true; } // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 - if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) { + if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) { bool ActuallyDoIt = true; MVT LoadVT; Type *LoadTy; - switch (Size->getZExtValue()) { + switch (CSize->getZExtValue()) { default: LoadVT = MVT::Other; LoadTy = 0; @@ -5499,20 +5509,20 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { break; case 2: LoadVT = MVT::i16; - LoadTy = Type::getInt16Ty(Size->getContext()); + LoadTy = Type::getInt16Ty(CSize->getContext()); break; case 4: LoadVT = MVT::i32; - LoadTy = Type::getInt32Ty(Size->getContext()); + LoadTy = Type::getInt32Ty(CSize->getContext()); break; case 8: LoadVT = MVT::i64; - LoadTy = Type::getInt64Ty(Size->getContext()); + LoadTy = Type::getInt64Ty(CSize->getContext()); break; /* case 16: LoadVT = MVT::v4i32; - LoadTy = Type::getInt32Ty(Size->getContext()); + LoadTy = Type::getInt32Ty(CSize->getContext()); LoadTy = VectorType::get(LoadTy, 4); break; */ @@ -5526,7 +5536,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // supports unaligned loads of that type. Expanding into byte loads would // bloat the code. const TargetLowering *TLI = TM.getTargetLowering(); - if (ActuallyDoIt && Size->getZExtValue() > 4) { + if (ActuallyDoIt && CSize->getZExtValue() > 4) { // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT)) @@ -5539,8 +5549,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, ISD::SETNE); - EVT CallVT = TLI->getValueType(I.getType(), true); - setValue(&I, DAG.getZExtOrTrunc(Res, getCurSDLoc(), CallVT)); + processIntegerCallValue(I, Res, false); return true; } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 82ac0ed69e2..217d2f9e6ac 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -543,6 +543,9 @@ private: llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } + void processIntegerCallValue(const Instruction &I, + SDValue Value, bool IsSigned); + void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); /// EmitFuncArgumentDbgValue - If V is an function argument then create diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index f2e877fd29b..a84ce28c514 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -339,7 +339,12 @@ static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) { static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg, const MachineRegisterInfo *MRI, const TargetRegisterInfo *TRI) { + MachineInstr *LGFR = 0; MachineInstr *RLL = getDef(SrcReg, MRI); + if (RLL && RLL->getOpcode() == SystemZ::LGFR) { + LGFR = RLL; + RLL = getDef(LGFR->getOperand(1).getReg(), MRI); + } if (!RLL || !isShift(RLL, SystemZ::RLL, 31)) return false; @@ -362,6 +367,8 @@ static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg, } Compare->eraseFromParent(); + if (LGFR) + eraseIfDead(LGFR, MRI); eraseIfDead(RLL, MRI); eraseIfDead(SRL, MRI); eraseIfDead(IPM, MRI); diff --git a/test/CodeGen/SystemZ/memcmp-01.ll b/test/CodeGen/SystemZ/memcmp-01.ll index 4d8cd14880a..5f5752b336d 100644 --- a/test/CodeGen/SystemZ/memcmp-01.ll +++ b/test/CodeGen/SystemZ/memcmp-01.ll @@ -1,4 +1,4 @@ -; Test memcmp using CLC. +; Test memcmp using CLC, with i32 results. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s diff --git a/test/CodeGen/SystemZ/memcmp-02.ll b/test/CodeGen/SystemZ/memcmp-02.ll new file mode 100644 index 00000000000..cae3d3d4943 --- /dev/null +++ b/test/CodeGen/SystemZ/memcmp-02.ll @@ -0,0 +1,135 @@ +; Test memcmp using CLC, with i64 results. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @memcmp(i8 *%src1, i8 *%src2, i64 %size) + +; Zero-length comparisons should be optimized away. +define i64 @f1(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f1: +; CHECK: lghi %r2, 0 +; CHECK: br %r14 + %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 0) + ret i64 %res +} + +; Check a case where the result is used as an integer. +define i64 @f2(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f2: +; CHECK: clc 0(2,%r2), 0(%r3) +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: rll [[REG]], [[REG]], 31 +; CHECK: lgfr %r2, [[REG]] +; CHECK: br %r14 + %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 2) + ret i64 %res +} + +; Check a case where the result is tested for equality. +define void @f3(i8 *%src1, i8 *%src2, i64 *%dest) { +; CHECK-LABEL: f3: +; CHECK: clc 0(3,%r2), 0(%r3) +; CHECK-NEXT: je {{\..*}} +; CHECK: br %r14 + %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 3) + %cmp = icmp eq i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 0, i64 *%dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested for inequality. +define void @f4(i8 *%src1, i8 *%src2, i64 *%dest) { +; CHECK-LABEL: f4: +; CHECK: clc 0(4,%r2), 0(%r3) +; CHECK-NEXT: jlh {{\..*}} +; CHECK: br %r14 +entry: + %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 4) + %cmp = icmp ne i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 0, i64 *%dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested via slt. +define void @f5(i8 *%src1, i8 *%src2, i64 *%dest) { +; CHECK-LABEL: f5: +; CHECK: clc 0(5,%r2), 0(%r3) +; CHECK-NEXT: jl {{\..*}} +; CHECK: br %r14 +entry: + %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 5) + %cmp = icmp slt i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 0, i64 *%dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested for sgt. +define void @f6(i8 *%src1, i8 *%src2, i64 *%dest) { +; CHECK-LABEL: f6: +; CHECK: clc 0(6,%r2), 0(%r3) +; CHECK-NEXT: jh {{\..*}} +; CHECK: br %r14 +entry: + %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 6) + %cmp = icmp sgt i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 0, i64 *%dest + br label %exit + +exit: + ret void +} + +; Check the upper end of the CLC range. Here the result is used both as +; an integer and for branching. +define i64 @f7(i8 *%src1, i8 *%src2, i64 *%dest) { +; CHECK-LABEL: f7: +; CHECK: clc 0(256,%r2), 0(%r3) +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: rll [[REG]], [[REG]], 31 +; CHECK: lgfr %r2, [[REG]] +; CHECK: jl {{.L*}} +; CHECK: br %r14 +entry: + %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 256) + %cmp = icmp slt i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 0, i64 *%dest + br label %exit + +exit: + ret i64 %res +} + +; 257 bytes is too big for a single CLC. For now expect a call instead. +define i64 @f8(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f8: +; CHECK: brasl %r14, memcmp@PLT +; CHECK: br %r14 + %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 257) + ret i64 %res +}