[SystemZ] Add basic prefetch support

Just the instructions and intrinsics for now.

llvm-svn: 189100
This commit is contained in:
Richard Sandiford 2013-08-23 11:36:42 +00:00
parent 152d2f09a8
commit 9867b44c59
12 changed files with 312 additions and 5 deletions

View File

@ -35,10 +35,6 @@ performance measurements.
--
We don't support prefetching yet.
--
There is no scheduling support.
--

View File

@ -57,6 +57,10 @@ namespace llvm {
const unsigned CCMASK_SRST_NOTFOUND = CCMASK_2;
const unsigned CCMASK_SRST = CCMASK_1 | CCMASK_2;
// Mask assignments for PFD.
const unsigned PFD_READ = 1;
const unsigned PFD_WRITE = 2;
// Return true if Val fits an LLILL operand.
static inline bool isImmLL(uint64_t Val) {
return (Val & ~0x000000000000ffffULL) == 0;

View File

@ -194,6 +194,9 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
// Handle prefetches with PFD or PFDRL.
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
// Handle floating-point types.
for (unsigned I = MVT::FIRST_FP_VALUETYPE;
I <= MVT::LAST_FP_VALUETYPE;
@ -1806,6 +1809,26 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
SystemZ::R15D, Op.getOperand(1));
}
SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
SelectionDAG &DAG) const {
bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
if (!IsData)
// Just preserve the chain.
return Op.getOperand(0);
bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
MemIntrinsicSDNode *Node = cast<MemIntrinsicSDNode>(Op.getNode());
SDValue Ops[] = {
Op.getOperand(0),
DAG.getConstant(Code, MVT::i32),
Op.getOperand(1)
};
return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op),
Node->getVTList(), Ops, array_lengthof(Ops),
Node->getMemoryVT(), Node->getMemOperand());
}
SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@ -1869,6 +1892,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerSTACKSAVE(Op, DAG);
case ISD::STACKRESTORE:
return lowerSTACKRESTORE(Op, DAG);
case ISD::PREFETCH:
return lowerPREFETCH(Op, DAG);
default:
llvm_unreachable("Unexpected node to lower");
}
@ -1909,6 +1934,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(ATOMIC_LOADW_UMIN);
OPCODE(ATOMIC_LOADW_UMAX);
OPCODE(ATOMIC_CMP_SWAPW);
OPCODE(PREFETCH);
}
return NULL;
#undef OPCODE

View File

@ -132,7 +132,12 @@ namespace SystemZISD {
// operand into the high bits
// Operand 4: the negative of operand 2, for rotating the other way
// Operand 5: the width of the field in bits (8 or 16)
ATOMIC_CMP_SWAPW
ATOMIC_CMP_SWAPW,
// Prefetch from the second operand using the 4-bit control code in
// the first operand. The code is 1 for a load prefetch and 2 for
// a store prefetch.
PREFETCH
};
}
@ -225,6 +230,7 @@ private:
SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
// If the last instruction before MBBI in MBB was some form of COMPARE,
// try to replace it with a COMPARE AND BRANCH just before MBBI.

View File

@ -540,6 +540,10 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
// One output operand and five input operands. The first two operands
// are registers and the other three are immediates.
//
// Prefetch:
// One 4-bit immediate operand and one address operand. The immediate
// operand is 1 for a load prefetch and 2 for a store prefetch.
//
// The format determines which input operands are tied to output operands,
// and also determines the shape of any address operand.
//
@ -1304,6 +1308,22 @@ class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let DisableEncoding = "$R1src";
}
class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
: InstRXY<opcode, (outs), (ins uimm8zx4:$R1, bdxaddr20only:$XBD2),
mnemonic##"\t$R1, $XBD2",
[(operator uimm8zx4:$R1, bdxaddr20only:$XBD2)]>;
class PrefetchRILPC<string mnemonic, bits<12> opcode,
SDPatternOperator operator>
: InstRIL<opcode, (outs), (ins uimm8zx4:$R1, pcrel32:$I2),
mnemonic##"\t$R1, $I2",
[(operator uimm8zx4:$R1, pcrel32:$I2)]> {
// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
// However, BDXs have two extra operands and are therefore 6 units more
// complex.
let AddedComplexity = 7;
}
// A floating-point load-and test operation. Create both a normal unary
// operation and one that acts as a comparison against zero.
multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,

View File

@ -1034,6 +1034,13 @@ let mayLoad = 1, Defs = [CC] in
let mayLoad = 1, Defs = [CC], Uses = [R0W] in
defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>;
//===----------------------------------------------------------------------===//
// Prefetch
//===----------------------------------------------------------------------===//
def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>;
def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>;
//===----------------------------------------------------------------------===//
// Atomic operations
//===----------------------------------------------------------------------===//

View File

@ -64,6 +64,9 @@ def SDT_ZString : SDTypeProfile<1, 3,
SDTCisPtrTy<2>,
SDTCisVT<3, i32>]>;
def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
def SDT_ZPrefetch : SDTypeProfile<0, 2,
[SDTCisVT<0, i8>,
SDTCisPtrTy<1>]>;
//===----------------------------------------------------------------------===//
// Node definitions
@ -130,6 +133,9 @@ def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZString,
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic,
[SDNPInGlue]>;
def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore,
SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
// Pattern fragments

View File

@ -0,0 +1,87 @@
; Test data prefetching.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare void @llvm.prefetch(i8*, i32, i32, i32)
@g = global [4096 x i8] zeroinitializer
; Check that instruction read prefetches are ignored.
define void @f1(i8 *%ptr) {
; CHECK-LABEL: f1:
; CHECK-NOT: %r2
; CHECK: br %r14
call void @llvm.prefetch(i8 *%ptr, i32 0, i32 0, i32 0)
ret void
}
; Check that instruction write prefetches are ignored.
define void @f2(i8 *%ptr) {
; CHECK-LABEL: f2:
; CHECK-NOT: %r2
; CHECK: br %r14
call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 0)
ret void
}
; Check data read prefetches.
define void @f3(i8 *%ptr) {
; CHECK-LABEL: f3:
; CHECK: pfd 1, 0(%r2)
; CHECK: br %r14
call void @llvm.prefetch(i8 *%ptr, i32 0, i32 0, i32 1)
ret void
}
; Check data write prefetches.
define void @f4(i8 *%ptr) {
; CHECK-LABEL: f4:
; CHECK: pfd 2, 0(%r2)
; CHECK: br %r14
call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
ret void
}
; Check an address at the negative end of the range.
define void @f5(i8 *%base, i64 %index) {
; CHECK-LABEL: f5:
; CHECK: pfd 2, -524288({{%r2,%r3|%r3,%r2}})
; CHECK: br %r14
%add = add i64 %index, -524288
%ptr = getelementptr i8 *%base, i64 %add
call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
ret void
}
; Check an address at the positive end of the range.
define void @f6(i8 *%base, i64 %index) {
; CHECK-LABEL: f6:
; CHECK: pfd 2, 524287({{%r2,%r3|%r3,%r2}})
; CHECK: br %r14
%add = add i64 %index, 524287
%ptr = getelementptr i8 *%base, i64 %add
call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
ret void
}
; Check that the next address up still compiles.
define void @f7(i8 *%base, i64 %index) {
; CHECK-LABEL: f7:
; CHECK: 524288
; CHECK: pfd 2,
; CHECK: br %r14
%add = add i64 %index, 524288
%ptr = getelementptr i8 *%base, i64 %add
call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
ret void
}
; Check pc-relative prefetches.
define void @f8() {
; CHECK-LABEL: f8:
; CHECK: pfdrl 2, g
; CHECK: br %r14
%ptr = getelementptr [4096 x i8] *@g, i64 0, i64 0
call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
ret void
}

View File

@ -1330,3 +1330,35 @@
# 0x0000077c:
# CHECK: brctg %r15, 0x1077a
0xa7 0xf7 0x7f 0xff
# 0x00000780:
# CHECK: pfdrl 0, 0x780
0xc6 0x02 0x00 0x00 0x00 0x00
# 0x00000786:
# CHECK: pfdrl 15, 0x786
0xc6 0xf2 0x00 0x00 0x00 0x00
# 0x0000078c:
# CHECK: pfdrl 0, 0x78a
0xc6 0x02 0xff 0xff 0xff 0xff
# 0x00000792:
# CHECK: pfdrl 15, 0x790
0xc6 0xf2 0xff 0xff 0xff 0xff
# 0x00000798:
# CHECK: pfdrl 0, 0xffffffff00000798
0xc6 0x02 0x80 0x00 0x00 0x00
# 0x0000079e:
# CHECK: pfdrl 15, 0xffffffff0000079e
0xc6 0xf2 0x80 0x00 0x00 0x00
# 0x000007a4:
# CHECK: pfdrl 0, 0x1000007a2
0xc6 0x02 0x7f 0xff 0xff 0xff
# 0x000007aa:
# CHECK: pfdrl 15, 0x1000007a8
0xc6 0xf2 0x7f 0xff 0xff 0xff

View File

@ -5329,6 +5329,36 @@
# CHECK: oy %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0x56
# CHECK: pfd 0, -524288
0xe3 0x00 0x00 0x00 0x80 0x36
# CHECK: pfd 0, -1
0xe3 0x00 0x0f 0xff 0xff 0x36
# CHECK: pfd 0, 0
0xe3 0x00 0x00 0x00 0x00 0x36
# CHECK: pfd 0, 1
0xe3 0x00 0x00 0x01 0x00 0x36
# CHECK: pfd 0, 524287
0xe3 0x00 0x0f 0xff 0x7f 0x36
# CHECK: pfd 0, 0(%r1)
0xe3 0x00 0x10 0x00 0x00 0x36
# CHECK: pfd 0, 0(%r15)
0xe3 0x00 0xf0 0x00 0x00 0x36
# CHECK: pfd 0, 524287(%r1,%r15)
0xe3 0x01 0xff 0xff 0x7f 0x36
# CHECK: pfd 0, 524287(%r15,%r1)
0xe3 0x0f 0x1f 0xff 0x7f 0x36
# CHECK: pfd 15, 0
0xe3 0xf0 0x00 0x00 0x00 0x36
# CHECK: risbg %r0, %r0, 0, 0, 0
0xec 0x00 0x00 0x00 0x00 0x55

View File

@ -2275,6 +2275,40 @@
oy %r0, -524289
oy %r0, 524288
#CHECK: error: invalid operand
#CHECK: pfd -1, 0
#CHECK: error: invalid operand
#CHECK: pfd 16, 0
#CHECK: error: invalid operand
#CHECK: pfd 1, -524289
#CHECK: error: invalid operand
#CHECK: pfd 1, 524288
pfd -1, 0
pfd 16, 0
pfd 1, -524289
pfd 1, 524288
#CHECK: error: invalid operand
#CHECK: pfdrl -1, 0
#CHECK: error: invalid operand
#CHECK: pfdrl 16, 0
#CHECK: error: offset out of range
#CHECK: pfdrl 1, -0x1000000002
#CHECK: error: offset out of range
#CHECK: pfdrl 1, -1
#CHECK: error: offset out of range
#CHECK: pfdrl 1, 1
#CHECK: error: offset out of range
#CHECK: pfdrl 1, 0x100000000
pfdrl -1, 0
pfdrl 16, 0
pfdrl 1, -0x1000000002
pfdrl 1, -1
pfdrl 1, 1
pfdrl 1, 0x100000000
#CHECK: error: invalid operand
#CHECK: risbg %r0,%r0,0,0,-1
#CHECK: error: invalid operand

View File

@ -6106,6 +6106,65 @@
oy %r0, 524287(%r15,%r1)
oy %r15, 0
#CHECK: pfd 0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x36]
#CHECK: pfd 0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x36]
#CHECK: pfd 0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x36]
#CHECK: pfd 0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x36]
#CHECK: pfd 0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x36]
#CHECK: pfd 0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x36]
#CHECK: pfd 0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x36]
#CHECK: pfd 0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x36]
#CHECK: pfd 0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x36]
#CHECK: pfd 15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x36]
pfd 0, -524288
pfd 0, -1
pfd 0, 0
pfd 0, 1
pfd 0, 524287
pfd 0, 0(%r1)
pfd 0, 0(%r15)
pfd 0, 524287(%r1,%r15)
pfd 0, 524287(%r15,%r1)
pfd 15, 0
#CHECK: pfdrl 0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x02,A,A,A,A]
#CHECK: fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
pfdrl 0, -0x100000000
#CHECK: pfdrl 0, .[[LAB:L.*]]-2 # encoding: [0xc6,0x02,A,A,A,A]
#CHECK: fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
pfdrl 0, -2
#CHECK: pfdrl 0, .[[LAB:L.*]] # encoding: [0xc6,0x02,A,A,A,A]
#CHECK: fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
pfdrl 0, 0
#CHECK: pfdrl 0, .[[LAB:L.*]]+4294967294 # encoding: [0xc6,0x02,A,A,A,A]
#CHECK: fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
pfdrl 0, 0xfffffffe
#CHECK: pfdrl 0, foo # encoding: [0xc6,0x02,A,A,A,A]
# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
#CHECK: pfdrl 15, foo # encoding: [0xc6,0xf2,A,A,A,A]
# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
pfdrl 0, foo
pfdrl 15, foo
#CHECK: pfdrl 3, bar+100 # encoding: [0xc6,0x32,A,A,A,A]
# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
#CHECK: pfdrl 4, bar+100 # encoding: [0xc6,0x42,A,A,A,A]
# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
pfdrl 3, bar+100
pfdrl 4, bar+100
#CHECK: pfdrl 7, frob@PLT # encoding: [0xc6,0x72,A,A,A,A]
# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
#CHECK: pfdrl 8, frob@PLT # encoding: [0xc6,0x82,A,A,A,A]
# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
pfdrl 7, frob@PLT
pfdrl 8, frob@PLT
#CHECK: risbg %r0, %r0, 0, 0, 0 # encoding: [0xec,0x00,0x00,0x00,0x00,0x55]
#CHECK: risbg %r0, %r0, 0, 0, 63 # encoding: [0xec,0x00,0x00,0x00,0x3f,0x55]
#CHECK: risbg %r0, %r0, 0, 255, 0 # encoding: [0xec,0x00,0x00,0xff,0x00,0x55]