[SystemZ] Use SRST to implement strlen and strnlen

It would also make sense to use it for memchr; I'm working on that now.

llvm-svn: 188547
This commit is contained in:
Richard Sandiford 2013-08-16 11:41:43 +00:00
parent 93a75a2a56
commit 06a13f49c8
14 changed files with 236 additions and 1 deletions

View File

@ -700,7 +700,7 @@ public:
case LibFunc::log2: case LibFunc::log2f: case LibFunc::log2l:
case LibFunc::exp2: case LibFunc::exp2f: case LibFunc::exp2l:
case LibFunc::memcmp: case LibFunc::strcmp: case LibFunc::strcpy:
case LibFunc::stpcpy:
case LibFunc::stpcpy: case LibFunc::strlen: case LibFunc::strnlen:
return true;
}
return false;

View File

@ -137,6 +137,19 @@ public:
MachinePointerInfo Op2PtrInfo) const {
return std::make_pair(SDValue(), SDValue());
}
virtual std::pair<SDValue, SDValue>
EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Src, MachinePointerInfo SrcPtrInfo) const {
return std::make_pair(SDValue(), SDValue());
}
virtual std::pair<SDValue, SDValue>
EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Src, SDValue MaxLength,
MachinePointerInfo SrcPtrInfo) const {
return std::make_pair(SDValue(), SDValue());
}
};
} // end llvm namespace

View File

@ -5616,6 +5616,59 @@ bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
return false;
}
/// visitStrLenCall -- See if we can lower a strlen call into an optimized
/// form. If so, return true and lower it, otherwise return false and it
/// will be lowered like a normal call.
bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
// Verify that the prototype makes sense. size_t strlen(char *)
if (I.getNumArgOperands() != 1)
return false;
const Value *Arg0 = I.getArgOperand(0);
if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy())
return false;
const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(),
getValue(Arg0), MachinePointerInfo(Arg0));
if (Res.first.getNode()) {
processIntegerCallValue(I, Res.first, false);
PendingLoads.push_back(Res.second);
return true;
}
return false;
}
/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized
/// form. If so, return true and lower it, otherwise return false and it
/// will be lowered like a normal call.
bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
// Verify that the prototype makes sense. size_t strnlen(char *, size_t)
if (I.getNumArgOperands() != 2)
return false;
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
if (!Arg0->getType()->isPointerTy() ||
!Arg1->getType()->isIntegerTy() ||
!I.getType()->isIntegerTy())
return false;
const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(),
getValue(Arg0), getValue(Arg1),
MachinePointerInfo(Arg0));
if (Res.first.getNode()) {
processIntegerCallValue(I, Res.first, false);
PendingLoads.push_back(Res.second);
return true;
}
return false;
}
/// visitUnaryFloatCall - If a call instruction is a unary floating-point
/// operation (as expected), translate it to an SDNode with the specified opcode
/// and return true.
@ -5774,6 +5827,14 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitStrCmpCall(I))
return;
break;
case LibFunc::strlen:
if (visitStrLenCall(I))
return;
break;
case LibFunc::strnlen:
if (visitStrNLenCall(I))
return;
break;
}
}
}

View File

@ -525,6 +525,8 @@ private:
bool visitMemCmpCall(const CallInst &I);
bool visitStrCpyCall(const CallInst &I, bool isStpcpy);
bool visitStrCmpCall(const CallInst &I);
bool visitStrLenCall(const CallInst &I);
bool visitStrNLenCall(const CallInst &I);
bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
void visitAtomicLoad(const LoadInst &I);
void visitAtomicStore(const StoreInst &I);

View File

@ -1704,6 +1704,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(CLC);
OPCODE(STRCMP);
OPCODE(STPCPY);
OPCODE(SEARCH_STRING);
OPCODE(IPM);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
@ -2554,6 +2555,8 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
return emitStringWrapper(MI, MBB, SystemZ::CLST);
case SystemZ::MVSTLoop:
return emitStringWrapper(MI, MBB, SystemZ::MVST);
case SystemZ::SRSTLoop:
return emitStringWrapper(MI, MBB, SystemZ::SRST);
default:
llvm_unreachable("Unexpected instr type to insert");
}

View File

@ -91,6 +91,12 @@ namespace SystemZISD {
// are the addresses of the strings to compare.
STRCMP,
// Use an SRST-based sequence to search a block of memory. The first
// operand is the end address, the second is the start, and the third
// is the character to search for. CC is set to 1 on success and 2
// on failure.
SEARCH_STRING,
// Store the CC value in bits 29 and 28 of an integer.
IPM,

View File

@ -1163,6 +1163,10 @@ let usesCustomInserter = 1 in {
def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
}
// Search a block of memory for a character.
let mayLoad = 1, Defs = [CC], Uses = [R0W] in
defm SRST : StringRRE<"srst", 0xb25e, z_search_string>;
//===----------------------------------------------------------------------===//
// Peepholes.
//===----------------------------------------------------------------------===//

View File

@ -123,6 +123,8 @@ def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZString,
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZString,
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic,
[SDNPInGlue]>;

View File

@ -181,3 +181,37 @@ EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Glue = Chain.getValue(2);
return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
}
// Search from Src for a null character, stopping once Src reaches Limit.
// Return a pair of values, the first being the number of nonnull characters
// and the second being the out chain.
//
// This can be used for strlen by setting Limit to 0.
static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL,
SDValue Chain, SDValue Src,
SDValue Limit) {
EVT PtrVT = Src.getValueType();
SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue);
SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain,
Limit, Src, DAG.getConstant(0, MVT::i32));
Chain = End.getValue(1);
SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src);
return std::make_pair(Len, Chain);
}
std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Src, MachinePointerInfo SrcPtrInfo) const {
EVT PtrVT = Src.getValueType();
return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, PtrVT));
}
std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Src, SDValue MaxLength,
MachinePointerInfo SrcPtrInfo) const {
EVT PtrVT = Src.getValueType();
MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT);
SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength);
return getBoundedStrlen(DAG, DL, Chain, Src, Limit);
}

View File

@ -58,6 +58,16 @@ public:
SDValue Src1, SDValue Src2,
MachinePointerInfo Op1PtrInfo,
MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE;
virtual std::pair<SDValue, SDValue>
EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Src, MachinePointerInfo SrcPtrInfo) const
LLVM_OVERRIDE;
virtual std::pair<SDValue, SDValue>
EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Src, SDValue MaxLength,
MachinePointerInfo SrcPtrInfo) const LLVM_OVERRIDE;
};
}

View File

@ -0,0 +1,39 @@
; Test strlen using SRST, i64 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare i64 @strlen(i8 *%src)
declare i64 @strnlen(i8 *%src, i64 %len)
; Test strlen with its proper i64 prototype. It would also be valid for
; the uses of %r3 and REG after the LGR to be swapped.
define i64 @f1(i32 %dummy, i8 *%src) {
; CHECK-LABEL: f1:
; CHECK-DAG: lhi %r0, 0
; CHECK-DAG: lghi %r2, 0
; CHECK-DAG: lgr [[REG:%r[145]]], %r3
; CHECK: [[LABEL:\.[^:]*]]:
; CHECK-NEXT: srst %r2, [[REG]]
; CHECK-NEXT: jo [[LABEL]]
; CHECK-NEXT: BB#{{[0-9]+}}
; CHECK-NEXT: sgr %r2, %r3
; CHECK: br %r14
%res = call i64 @strlen(i8 *%src)
ret i64 %res
}
; Test strnlen with its proper i64 prototype.
define i64 @f2(i64 %len, i8 *%src) {
; CHECK-LABEL: f2:
; CHECK-DAG: agr %r2, %r3
; CHECK-DAG: lhi %r0, 0
; CHECK-DAG: lgr [[REG:%r[145]]], %r3
; CHECK: [[LABEL:\.[^:]*]]:
; CHECK-NEXT: srst %r2, [[REG]]
; CHECK-NEXT: jo [[LABEL]]
; CHECK-NEXT: BB#{{[0-9]+}}
; CHECK-NEXT: sgr %r2, %r3
; CHECK: br %r14
%res = call i64 @strnlen(i8 *%src, i64 %len)
ret i64 %res
}

View File

@ -0,0 +1,39 @@
; Test strlen using SRST, i32 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare i32 @strlen(i8 *%src)
declare i32 @strnlen(i8 *%src, i32 %len)
; Test strlen with an i32-based prototype. It would also be valid for
; the uses of %r3 and REG after the LGR to be swapped.
define i32 @f1(i32 %dummy, i8 *%src) {
; CHECK-LABEL: f1:
; CHECK-DAG: lhi %r0, 0
; CHECK-DAG: lghi %r2, 0
; CHECK-DAG: lgr [[REG:%r[145]]], %r3
; CHECK: [[LABEL:\.[^:]*]]:
; CHECK-NEXT: srst %r2, [[REG]]
; CHECK-NEXT: jo [[LABEL]]
; CHECK-NEXT: BB#{{[0-9]+}}
; CHECK-NEXT: sgr %r2, %r3
; CHECK: br %r14
%res = call i32 @strlen(i8 *%src)
ret i32 %res
}
; Test strnlen with an i32-based prototype.
define i32 @f2(i32 zeroext %len, i8 *%src) {
; CHECK-LABEL: f2:
; CHECK-DAG: agr %r2, %r3
; CHECK-DAG: lhi %r0, 0
; CHECK-DAG: lgr [[REG:%r[145]]], %r3
; CHECK: [[LABEL:\.[^:]*]]:
; CHECK-NEXT: srst %r2, [[REG]]
; CHECK-NEXT: jo [[LABEL]]
; CHECK-NEXT: BB#{{[0-9]+}}
; CHECK-NEXT: sgr %r2, %r3
; CHECK: br %r14
%res = call i32 @strnlen(i8 *%src, i32 %len)
ret i32 %res
}

View File

@ -6253,6 +6253,18 @@
# CHECK: srk %r2, %r3, %r4
0xb9 0xf9 0x40 0x23
# CHECK: srst %r0, %r0
0xb2 0x5e 0x00 0x00
# CHECK: srst %r0, %r15
0xb2 0x5e 0x00 0x0f
# CHECK: srst %r15, %r0
0xb2 0x5e 0x00 0xf0
# CHECK: srst %r7, %r8
0xb2 0x5e 0x00 0x78
# CHECK: stc %r0, 0
0x42 0x00 0x00 0x00

View File

@ -6728,6 +6728,16 @@
srlg %r0,%r0,524287(%r1)
srlg %r0,%r0,524287(%r15)
#CHECK: srst %r0, %r0 # encoding: [0xb2,0x5e,0x00,0x00]
#CHECK: srst %r0, %r15 # encoding: [0xb2,0x5e,0x00,0x0f]
#CHECK: srst %r15, %r0 # encoding: [0xb2,0x5e,0x00,0xf0]
#CHECK: srst %r7, %r8 # encoding: [0xb2,0x5e,0x00,0x78]
srst %r0,%r0
srst %r0,%r15
srst %r15,%r0
srst %r7,%r8
#CHECK: st %r0, 0 # encoding: [0x50,0x00,0x00,0x00]
#CHECK: st %r0, 4095 # encoding: [0x50,0x00,0x0f,0xff]
#CHECK: st %r0, 0(%r1) # encoding: [0x50,0x00,0x10,0x00]