mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-07 04:21:27 +00:00
[SystemZ] Fix handling of 64-bit memcmp results
Generalize r188163 to cope with return types other than MVT::i32, just as the existing visitMemCmpCall code did. I've split this out into a subroutine so that it can be used for other upcoming patches. I also noticed that I'd used the wrong API to record the out chain. It's a load that uses DAG.getRoot() rather than getRoot(), so the out chain should go on PendingLoads. I don't have a testcase for that because we don't do any interesting scheduling on z yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188540 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6c51f89498
commit
6a079fef4f
@ -5448,6 +5448,18 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
|
|||||||
return LoadVal;
|
return LoadVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// processIntegerCallValue - Record the value for an instruction that
|
||||||
|
/// produces an integer result, converting the type where necessary.
|
||||||
|
void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
|
||||||
|
SDValue Value,
|
||||||
|
bool IsSigned) {
|
||||||
|
EVT VT = TM.getTargetLowering()->getValueType(I.getType(), true);
|
||||||
|
if (IsSigned)
|
||||||
|
Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
|
||||||
|
else
|
||||||
|
Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT);
|
||||||
|
setValue(&I, Value);
|
||||||
|
}
|
||||||
|
|
||||||
/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
|
/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
|
||||||
/// If so, return true and lower it, otherwise return false and it will be
|
/// If so, return true and lower it, otherwise return false and it will be
|
||||||
@ -5463,35 +5475,33 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
|
|||||||
!I.getType()->isIntegerTy())
|
!I.getType()->isIntegerTy())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2));
|
const Value *Size = I.getArgOperand(2);
|
||||||
if (Size && Size->getZExtValue() == 0) {
|
const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
|
||||||
|
if (CSize && CSize->getZExtValue() == 0) {
|
||||||
EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true);
|
EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true);
|
||||||
setValue(&I, DAG.getConstant(0, CallVT));
|
setValue(&I, DAG.getConstant(0, CallVT));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Value *Arg0 = I.getArgOperand(0);
|
|
||||||
const Value *Arg1 = I.getArgOperand(1);
|
|
||||||
const Value *Arg2 = I.getArgOperand(2);
|
|
||||||
const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
|
const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
|
||||||
std::pair<SDValue, SDValue> Res =
|
std::pair<SDValue, SDValue> Res =
|
||||||
TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(),
|
TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(),
|
||||||
getValue(Arg0), getValue(Arg1), getValue(Arg2),
|
getValue(LHS), getValue(RHS), getValue(Size),
|
||||||
MachinePointerInfo(Arg0),
|
MachinePointerInfo(LHS),
|
||||||
MachinePointerInfo(Arg1));
|
MachinePointerInfo(RHS));
|
||||||
if (Res.first.getNode()) {
|
if (Res.first.getNode()) {
|
||||||
setValue(&I, Res.first);
|
processIntegerCallValue(I, Res.first, true);
|
||||||
DAG.setRoot(Res.second);
|
PendingLoads.push_back(Res.second);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
|
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
|
||||||
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
|
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
|
||||||
if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
|
if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) {
|
||||||
bool ActuallyDoIt = true;
|
bool ActuallyDoIt = true;
|
||||||
MVT LoadVT;
|
MVT LoadVT;
|
||||||
Type *LoadTy;
|
Type *LoadTy;
|
||||||
switch (Size->getZExtValue()) {
|
switch (CSize->getZExtValue()) {
|
||||||
default:
|
default:
|
||||||
LoadVT = MVT::Other;
|
LoadVT = MVT::Other;
|
||||||
LoadTy = 0;
|
LoadTy = 0;
|
||||||
@ -5499,20 +5509,20 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
|
|||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
LoadVT = MVT::i16;
|
LoadVT = MVT::i16;
|
||||||
LoadTy = Type::getInt16Ty(Size->getContext());
|
LoadTy = Type::getInt16Ty(CSize->getContext());
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
LoadVT = MVT::i32;
|
LoadVT = MVT::i32;
|
||||||
LoadTy = Type::getInt32Ty(Size->getContext());
|
LoadTy = Type::getInt32Ty(CSize->getContext());
|
||||||
break;
|
break;
|
||||||
case 8:
|
case 8:
|
||||||
LoadVT = MVT::i64;
|
LoadVT = MVT::i64;
|
||||||
LoadTy = Type::getInt64Ty(Size->getContext());
|
LoadTy = Type::getInt64Ty(CSize->getContext());
|
||||||
break;
|
break;
|
||||||
/*
|
/*
|
||||||
case 16:
|
case 16:
|
||||||
LoadVT = MVT::v4i32;
|
LoadVT = MVT::v4i32;
|
||||||
LoadTy = Type::getInt32Ty(Size->getContext());
|
LoadTy = Type::getInt32Ty(CSize->getContext());
|
||||||
LoadTy = VectorType::get(LoadTy, 4);
|
LoadTy = VectorType::get(LoadTy, 4);
|
||||||
break;
|
break;
|
||||||
*/
|
*/
|
||||||
@ -5526,7 +5536,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
|
|||||||
// supports unaligned loads of that type. Expanding into byte loads would
|
// supports unaligned loads of that type. Expanding into byte loads would
|
||||||
// bloat the code.
|
// bloat the code.
|
||||||
const TargetLowering *TLI = TM.getTargetLowering();
|
const TargetLowering *TLI = TM.getTargetLowering();
|
||||||
if (ActuallyDoIt && Size->getZExtValue() > 4) {
|
if (ActuallyDoIt && CSize->getZExtValue() > 4) {
|
||||||
// TODO: Handle 5 byte compare as 4-byte + 1 byte.
|
// TODO: Handle 5 byte compare as 4-byte + 1 byte.
|
||||||
// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
|
// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
|
||||||
if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT))
|
if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT))
|
||||||
@ -5539,8 +5549,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
|
|||||||
|
|
||||||
SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal,
|
SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal,
|
||||||
ISD::SETNE);
|
ISD::SETNE);
|
||||||
EVT CallVT = TLI->getValueType(I.getType(), true);
|
processIntegerCallValue(I, Res, false);
|
||||||
setValue(&I, DAG.getZExtOrTrunc(Res, getCurSDLoc(), CallVT));
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -543,6 +543,9 @@ private:
|
|||||||
llvm_unreachable("UserOp2 should not exist at instruction selection time!");
|
llvm_unreachable("UserOp2 should not exist at instruction selection time!");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void processIntegerCallValue(const Instruction &I,
|
||||||
|
SDValue Value, bool IsSigned);
|
||||||
|
|
||||||
void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
|
void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
|
||||||
|
|
||||||
/// EmitFuncArgumentDbgValue - If V is an function argument then create
|
/// EmitFuncArgumentDbgValue - If V is an function argument then create
|
||||||
|
@ -339,7 +339,12 @@ static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) {
|
|||||||
static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg,
|
static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg,
|
||||||
const MachineRegisterInfo *MRI,
|
const MachineRegisterInfo *MRI,
|
||||||
const TargetRegisterInfo *TRI) {
|
const TargetRegisterInfo *TRI) {
|
||||||
|
MachineInstr *LGFR = 0;
|
||||||
MachineInstr *RLL = getDef(SrcReg, MRI);
|
MachineInstr *RLL = getDef(SrcReg, MRI);
|
||||||
|
if (RLL && RLL->getOpcode() == SystemZ::LGFR) {
|
||||||
|
LGFR = RLL;
|
||||||
|
RLL = getDef(LGFR->getOperand(1).getReg(), MRI);
|
||||||
|
}
|
||||||
if (!RLL || !isShift(RLL, SystemZ::RLL, 31))
|
if (!RLL || !isShift(RLL, SystemZ::RLL, 31))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -362,6 +367,8 @@ static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg,
|
|||||||
}
|
}
|
||||||
|
|
||||||
Compare->eraseFromParent();
|
Compare->eraseFromParent();
|
||||||
|
if (LGFR)
|
||||||
|
eraseIfDead(LGFR, MRI);
|
||||||
eraseIfDead(RLL, MRI);
|
eraseIfDead(RLL, MRI);
|
||||||
eraseIfDead(SRL, MRI);
|
eraseIfDead(SRL, MRI);
|
||||||
eraseIfDead(IPM, MRI);
|
eraseIfDead(IPM, MRI);
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; Test memcmp using CLC.
|
; Test memcmp using CLC, with i32 results.
|
||||||
;
|
;
|
||||||
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
||||||
|
|
||||||
|
135
test/CodeGen/SystemZ/memcmp-02.ll
Normal file
135
test/CodeGen/SystemZ/memcmp-02.ll
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
; Test memcmp using CLC, with i64 results.
|
||||||
|
;
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
||||||
|
|
||||||
|
declare i64 @memcmp(i8 *%src1, i8 *%src2, i64 %size)
|
||||||
|
|
||||||
|
; Zero-length comparisons should be optimized away.
|
||||||
|
define i64 @f1(i8 *%src1, i8 *%src2) {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: lghi %r2, 0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 0)
|
||||||
|
ret i64 %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check a case where the result is used as an integer.
|
||||||
|
define i64 @f2(i8 *%src1, i8 *%src2) {
|
||||||
|
; CHECK-LABEL: f2:
|
||||||
|
; CHECK: clc 0(2,%r2), 0(%r3)
|
||||||
|
; CHECK: ipm [[REG:%r[0-5]]]
|
||||||
|
; CHECK: srl [[REG]], 28
|
||||||
|
; CHECK: rll [[REG]], [[REG]], 31
|
||||||
|
; CHECK: lgfr %r2, [[REG]]
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 2)
|
||||||
|
ret i64 %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check a case where the result is tested for equality.
|
||||||
|
define void @f3(i8 *%src1, i8 *%src2, i64 *%dest) {
|
||||||
|
; CHECK-LABEL: f3:
|
||||||
|
; CHECK: clc 0(3,%r2), 0(%r3)
|
||||||
|
; CHECK-NEXT: je {{\..*}}
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 3)
|
||||||
|
%cmp = icmp eq i64 %res, 0
|
||||||
|
br i1 %cmp, label %exit, label %store
|
||||||
|
|
||||||
|
store:
|
||||||
|
store i64 0, i64 *%dest
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
exit:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check a case where the result is tested for inequality.
|
||||||
|
define void @f4(i8 *%src1, i8 *%src2, i64 *%dest) {
|
||||||
|
; CHECK-LABEL: f4:
|
||||||
|
; CHECK: clc 0(4,%r2), 0(%r3)
|
||||||
|
; CHECK-NEXT: jlh {{\..*}}
|
||||||
|
; CHECK: br %r14
|
||||||
|
entry:
|
||||||
|
%res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 4)
|
||||||
|
%cmp = icmp ne i64 %res, 0
|
||||||
|
br i1 %cmp, label %exit, label %store
|
||||||
|
|
||||||
|
store:
|
||||||
|
store i64 0, i64 *%dest
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
exit:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check a case where the result is tested via slt.
|
||||||
|
define void @f5(i8 *%src1, i8 *%src2, i64 *%dest) {
|
||||||
|
; CHECK-LABEL: f5:
|
||||||
|
; CHECK: clc 0(5,%r2), 0(%r3)
|
||||||
|
; CHECK-NEXT: jl {{\..*}}
|
||||||
|
; CHECK: br %r14
|
||||||
|
entry:
|
||||||
|
%res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 5)
|
||||||
|
%cmp = icmp slt i64 %res, 0
|
||||||
|
br i1 %cmp, label %exit, label %store
|
||||||
|
|
||||||
|
store:
|
||||||
|
store i64 0, i64 *%dest
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
exit:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check a case where the result is tested for sgt.
|
||||||
|
define void @f6(i8 *%src1, i8 *%src2, i64 *%dest) {
|
||||||
|
; CHECK-LABEL: f6:
|
||||||
|
; CHECK: clc 0(6,%r2), 0(%r3)
|
||||||
|
; CHECK-NEXT: jh {{\..*}}
|
||||||
|
; CHECK: br %r14
|
||||||
|
entry:
|
||||||
|
%res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 6)
|
||||||
|
%cmp = icmp sgt i64 %res, 0
|
||||||
|
br i1 %cmp, label %exit, label %store
|
||||||
|
|
||||||
|
store:
|
||||||
|
store i64 0, i64 *%dest
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
exit:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check the upper end of the CLC range. Here the result is used both as
|
||||||
|
; an integer and for branching.
|
||||||
|
define i64 @f7(i8 *%src1, i8 *%src2, i64 *%dest) {
|
||||||
|
; CHECK-LABEL: f7:
|
||||||
|
; CHECK: clc 0(256,%r2), 0(%r3)
|
||||||
|
; CHECK: ipm [[REG:%r[0-5]]]
|
||||||
|
; CHECK: srl [[REG]], 28
|
||||||
|
; CHECK: rll [[REG]], [[REG]], 31
|
||||||
|
; CHECK: lgfr %r2, [[REG]]
|
||||||
|
; CHECK: jl {{.L*}}
|
||||||
|
; CHECK: br %r14
|
||||||
|
entry:
|
||||||
|
%res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 256)
|
||||||
|
%cmp = icmp slt i64 %res, 0
|
||||||
|
br i1 %cmp, label %exit, label %store
|
||||||
|
|
||||||
|
store:
|
||||||
|
store i64 0, i64 *%dest
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
exit:
|
||||||
|
ret i64 %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; 257 bytes is too big for a single CLC. For now expect a call instead.
|
||||||
|
define i64 @f8(i8 *%src1, i8 *%src2) {
|
||||||
|
; CHECK-LABEL: f8:
|
||||||
|
; CHECK: brasl %r14, memcmp@PLT
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 257)
|
||||||
|
ret i64 %res
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user