mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 21:00:29 +00:00
[SystemZ] Add basic prefetch support
Just the instructions and intrinsics for now. llvm-svn: 189100
This commit is contained in:
parent
152d2f09a8
commit
9867b44c59
@ -35,10 +35,6 @@ performance measurements.
|
||||
|
||||
--
|
||||
|
||||
We don't support prefetching yet.
|
||||
|
||||
--
|
||||
|
||||
There is no scheduling support.
|
||||
|
||||
--
|
||||
|
@ -57,6 +57,10 @@ namespace llvm {
|
||||
const unsigned CCMASK_SRST_NOTFOUND = CCMASK_2;
|
||||
const unsigned CCMASK_SRST = CCMASK_1 | CCMASK_2;
|
||||
|
||||
// Mask assignments for PFD.
|
||||
const unsigned PFD_READ = 1;
|
||||
const unsigned PFD_WRITE = 2;
|
||||
|
||||
// Return true if Val fits an LLILL operand.
|
||||
static inline bool isImmLL(uint64_t Val) {
|
||||
return (Val & ~0x000000000000ffffULL) == 0;
|
||||
|
@ -194,6 +194,9 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
|
||||
setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
|
||||
setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
|
||||
|
||||
// Handle prefetches with PFD or PFDRL.
|
||||
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
|
||||
|
||||
// Handle floating-point types.
|
||||
for (unsigned I = MVT::FIRST_FP_VALUETYPE;
|
||||
I <= MVT::LAST_FP_VALUETYPE;
|
||||
@ -1806,6 +1809,26 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
|
||||
SystemZ::R15D, Op.getOperand(1));
|
||||
}
|
||||
|
||||
SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
|
||||
if (!IsData)
|
||||
// Just preserve the chain.
|
||||
return Op.getOperand(0);
|
||||
|
||||
bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
|
||||
unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
|
||||
MemIntrinsicSDNode *Node = cast<MemIntrinsicSDNode>(Op.getNode());
|
||||
SDValue Ops[] = {
|
||||
Op.getOperand(0),
|
||||
DAG.getConstant(Code, MVT::i32),
|
||||
Op.getOperand(1)
|
||||
};
|
||||
return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op),
|
||||
Node->getVTList(), Ops, array_lengthof(Ops),
|
||||
Node->getMemoryVT(), Node->getMemOperand());
|
||||
}
|
||||
|
||||
SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
switch (Op.getOpcode()) {
|
||||
@ -1869,6 +1892,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
|
||||
return lowerSTACKSAVE(Op, DAG);
|
||||
case ISD::STACKRESTORE:
|
||||
return lowerSTACKRESTORE(Op, DAG);
|
||||
case ISD::PREFETCH:
|
||||
return lowerPREFETCH(Op, DAG);
|
||||
default:
|
||||
llvm_unreachable("Unexpected node to lower");
|
||||
}
|
||||
@ -1909,6 +1934,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
OPCODE(ATOMIC_LOADW_UMIN);
|
||||
OPCODE(ATOMIC_LOADW_UMAX);
|
||||
OPCODE(ATOMIC_CMP_SWAPW);
|
||||
OPCODE(PREFETCH);
|
||||
}
|
||||
return NULL;
|
||||
#undef OPCODE
|
||||
|
@ -132,7 +132,12 @@ namespace SystemZISD {
|
||||
// operand into the high bits
|
||||
// Operand 4: the negative of operand 2, for rotating the other way
|
||||
// Operand 5: the width of the field in bits (8 or 16)
|
||||
ATOMIC_CMP_SWAPW
|
||||
ATOMIC_CMP_SWAPW,
|
||||
|
||||
// Prefetch from the second operand using the 4-bit control code in
|
||||
// the first operand. The code is 1 for a load prefetch and 2 for
|
||||
// a store prefetch.
|
||||
PREFETCH
|
||||
};
|
||||
}
|
||||
|
||||
@ -225,6 +230,7 @@ private:
|
||||
SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
// If the last instruction before MBBI in MBB was some form of COMPARE,
|
||||
// try to replace it with a COMPARE AND BRANCH just before MBBI.
|
||||
|
@ -540,6 +540,10 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
|
||||
// One output operand and five input operands. The first two operands
|
||||
// are registers and the other three are immediates.
|
||||
//
|
||||
// Prefetch:
|
||||
// One 4-bit immediate operand and one address operand. The immediate
|
||||
// operand is 1 for a load prefetch and 2 for a store prefetch.
|
||||
//
|
||||
// The format determines which input operands are tied to output operands,
|
||||
// and also determines the shape of any address operand.
|
||||
//
|
||||
@ -1304,6 +1308,22 @@ class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
|
||||
let DisableEncoding = "$R1src";
|
||||
}
|
||||
|
||||
class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
|
||||
: InstRXY<opcode, (outs), (ins uimm8zx4:$R1, bdxaddr20only:$XBD2),
|
||||
mnemonic##"\t$R1, $XBD2",
|
||||
[(operator uimm8zx4:$R1, bdxaddr20only:$XBD2)]>;
|
||||
|
||||
class PrefetchRILPC<string mnemonic, bits<12> opcode,
|
||||
SDPatternOperator operator>
|
||||
: InstRIL<opcode, (outs), (ins uimm8zx4:$R1, pcrel32:$I2),
|
||||
mnemonic##"\t$R1, $I2",
|
||||
[(operator uimm8zx4:$R1, pcrel32:$I2)]> {
|
||||
// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
|
||||
// However, BDXs have two extra operands and are therefore 6 units more
|
||||
// complex.
|
||||
let AddedComplexity = 7;
|
||||
}
|
||||
|
||||
// A floating-point load-and test operation. Create both a normal unary
|
||||
// operation and one that acts as a comparison against zero.
|
||||
multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,
|
||||
|
@ -1034,6 +1034,13 @@ let mayLoad = 1, Defs = [CC] in
|
||||
let mayLoad = 1, Defs = [CC], Uses = [R0W] in
|
||||
defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Prefetch
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>;
|
||||
def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Atomic operations
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -64,6 +64,9 @@ def SDT_ZString : SDTypeProfile<1, 3,
|
||||
SDTCisPtrTy<2>,
|
||||
SDTCisVT<3, i32>]>;
|
||||
def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
|
||||
def SDT_ZPrefetch : SDTypeProfile<0, 2,
|
||||
[SDTCisVT<0, i8>,
|
||||
SDTCisPtrTy<1>]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Node definitions
|
||||
@ -130,6 +133,9 @@ def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZString,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
|
||||
def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic,
|
||||
[SDNPInGlue]>;
|
||||
def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMayStore,
|
||||
SDNPMemOperand]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Pattern fragments
|
||||
|
87
test/CodeGen/SystemZ/prefetch-01.ll
Normal file
87
test/CodeGen/SystemZ/prefetch-01.ll
Normal file
@ -0,0 +1,87 @@
|
||||
; Test data prefetching.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
||||
|
||||
declare void @llvm.prefetch(i8*, i32, i32, i32)
|
||||
|
||||
@g = global [4096 x i8] zeroinitializer
|
||||
|
||||
; Check that instruction read prefetches are ignored.
|
||||
define void @f1(i8 *%ptr) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK-NOT: %r2
|
||||
; CHECK: br %r14
|
||||
call void @llvm.prefetch(i8 *%ptr, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that instruction write prefetches are ignored.
|
||||
define void @f2(i8 *%ptr) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK-NOT: %r2
|
||||
; CHECK: br %r14
|
||||
call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check data read prefetches.
|
||||
define void @f3(i8 *%ptr) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: pfd 1, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
call void @llvm.prefetch(i8 *%ptr, i32 0, i32 0, i32 1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check data write prefetches.
|
||||
define void @f4(i8 *%ptr) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: pfd 2, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check an address at the negative end of the range.
|
||||
define void @f5(i8 *%base, i64 %index) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: pfd 2, -524288({{%r2,%r3|%r3,%r2}})
|
||||
; CHECK: br %r14
|
||||
%add = add i64 %index, -524288
|
||||
%ptr = getelementptr i8 *%base, i64 %add
|
||||
call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check an address at the positive end of the range.
|
||||
define void @f6(i8 *%base, i64 %index) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: pfd 2, 524287({{%r2,%r3|%r3,%r2}})
|
||||
; CHECK: br %r14
|
||||
%add = add i64 %index, 524287
|
||||
%ptr = getelementptr i8 *%base, i64 %add
|
||||
call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that the next address up still compiles.
|
||||
define void @f7(i8 *%base, i64 %index) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: 524288
|
||||
; CHECK: pfd 2,
|
||||
; CHECK: br %r14
|
||||
%add = add i64 %index, 524288
|
||||
%ptr = getelementptr i8 *%base, i64 %add
|
||||
call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check pc-relative prefetches.
|
||||
define void @f8() {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: pfdrl 2, g
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr [4096 x i8] *@g, i64 0, i64 0
|
||||
call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
|
||||
ret void
|
||||
}
|
@ -1330,3 +1330,35 @@
|
||||
# 0x0000077c:
|
||||
# CHECK: brctg %r15, 0x1077a
|
||||
0xa7 0xf7 0x7f 0xff
|
||||
|
||||
# 0x00000780:
|
||||
# CHECK: pfdrl 0, 0x780
|
||||
0xc6 0x02 0x00 0x00 0x00 0x00
|
||||
|
||||
# 0x00000786:
|
||||
# CHECK: pfdrl 15, 0x786
|
||||
0xc6 0xf2 0x00 0x00 0x00 0x00
|
||||
|
||||
# 0x0000078c:
|
||||
# CHECK: pfdrl 0, 0x78a
|
||||
0xc6 0x02 0xff 0xff 0xff 0xff
|
||||
|
||||
# 0x00000792:
|
||||
# CHECK: pfdrl 15, 0x790
|
||||
0xc6 0xf2 0xff 0xff 0xff 0xff
|
||||
|
||||
# 0x00000798:
|
||||
# CHECK: pfdrl 0, 0xffffffff00000798
|
||||
0xc6 0x02 0x80 0x00 0x00 0x00
|
||||
|
||||
# 0x0000079e:
|
||||
# CHECK: pfdrl 15, 0xffffffff0000079e
|
||||
0xc6 0xf2 0x80 0x00 0x00 0x00
|
||||
|
||||
# 0x000007a4:
|
||||
# CHECK: pfdrl 0, 0x1000007a2
|
||||
0xc6 0x02 0x7f 0xff 0xff 0xff
|
||||
|
||||
# 0x000007aa:
|
||||
# CHECK: pfdrl 15, 0x1000007a8
|
||||
0xc6 0xf2 0x7f 0xff 0xff 0xff
|
||||
|
@ -5329,6 +5329,36 @@
|
||||
# CHECK: oy %r15, 0
|
||||
0xe3 0xf0 0x00 0x00 0x00 0x56
|
||||
|
||||
# CHECK: pfd 0, -524288
|
||||
0xe3 0x00 0x00 0x00 0x80 0x36
|
||||
|
||||
# CHECK: pfd 0, -1
|
||||
0xe3 0x00 0x0f 0xff 0xff 0x36
|
||||
|
||||
# CHECK: pfd 0, 0
|
||||
0xe3 0x00 0x00 0x00 0x00 0x36
|
||||
|
||||
# CHECK: pfd 0, 1
|
||||
0xe3 0x00 0x00 0x01 0x00 0x36
|
||||
|
||||
# CHECK: pfd 0, 524287
|
||||
0xe3 0x00 0x0f 0xff 0x7f 0x36
|
||||
|
||||
# CHECK: pfd 0, 0(%r1)
|
||||
0xe3 0x00 0x10 0x00 0x00 0x36
|
||||
|
||||
# CHECK: pfd 0, 0(%r15)
|
||||
0xe3 0x00 0xf0 0x00 0x00 0x36
|
||||
|
||||
# CHECK: pfd 0, 524287(%r1,%r15)
|
||||
0xe3 0x01 0xff 0xff 0x7f 0x36
|
||||
|
||||
# CHECK: pfd 0, 524287(%r15,%r1)
|
||||
0xe3 0x0f 0x1f 0xff 0x7f 0x36
|
||||
|
||||
# CHECK: pfd 15, 0
|
||||
0xe3 0xf0 0x00 0x00 0x00 0x36
|
||||
|
||||
# CHECK: risbg %r0, %r0, 0, 0, 0
|
||||
0xec 0x00 0x00 0x00 0x00 0x55
|
||||
|
||||
|
@ -2275,6 +2275,40 @@
|
||||
oy %r0, -524289
|
||||
oy %r0, 524288
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: pfd -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: pfd 16, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: pfd 1, -524289
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: pfd 1, 524288
|
||||
|
||||
pfd -1, 0
|
||||
pfd 16, 0
|
||||
pfd 1, -524289
|
||||
pfd 1, 524288
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: pfdrl -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: pfdrl 16, 0
|
||||
#CHECK: error: offset out of range
|
||||
#CHECK: pfdrl 1, -0x1000000002
|
||||
#CHECK: error: offset out of range
|
||||
#CHECK: pfdrl 1, -1
|
||||
#CHECK: error: offset out of range
|
||||
#CHECK: pfdrl 1, 1
|
||||
#CHECK: error: offset out of range
|
||||
#CHECK: pfdrl 1, 0x100000000
|
||||
|
||||
pfdrl -1, 0
|
||||
pfdrl 16, 0
|
||||
pfdrl 1, -0x1000000002
|
||||
pfdrl 1, -1
|
||||
pfdrl 1, 1
|
||||
pfdrl 1, 0x100000000
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: risbg %r0,%r0,0,0,-1
|
||||
#CHECK: error: invalid operand
|
||||
|
@ -6106,6 +6106,65 @@
|
||||
oy %r0, 524287(%r15,%r1)
|
||||
oy %r15, 0
|
||||
|
||||
#CHECK: pfd 0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x36]
|
||||
#CHECK: pfd 0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x36]
|
||||
#CHECK: pfd 0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x36]
|
||||
#CHECK: pfd 0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x36]
|
||||
#CHECK: pfd 0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x36]
|
||||
#CHECK: pfd 0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x36]
|
||||
#CHECK: pfd 0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x36]
|
||||
#CHECK: pfd 0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x36]
|
||||
#CHECK: pfd 0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x36]
|
||||
#CHECK: pfd 15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x36]
|
||||
|
||||
pfd 0, -524288
|
||||
pfd 0, -1
|
||||
pfd 0, 0
|
||||
pfd 0, 1
|
||||
pfd 0, 524287
|
||||
pfd 0, 0(%r1)
|
||||
pfd 0, 0(%r15)
|
||||
pfd 0, 524287(%r1,%r15)
|
||||
pfd 0, 524287(%r15,%r1)
|
||||
pfd 15, 0
|
||||
|
||||
#CHECK: pfdrl 0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x02,A,A,A,A]
|
||||
#CHECK: fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
|
||||
pfdrl 0, -0x100000000
|
||||
#CHECK: pfdrl 0, .[[LAB:L.*]]-2 # encoding: [0xc6,0x02,A,A,A,A]
|
||||
#CHECK: fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
|
||||
pfdrl 0, -2
|
||||
#CHECK: pfdrl 0, .[[LAB:L.*]] # encoding: [0xc6,0x02,A,A,A,A]
|
||||
#CHECK: fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
|
||||
pfdrl 0, 0
|
||||
#CHECK: pfdrl 0, .[[LAB:L.*]]+4294967294 # encoding: [0xc6,0x02,A,A,A,A]
|
||||
#CHECK: fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
|
||||
pfdrl 0, 0xfffffffe
|
||||
|
||||
#CHECK: pfdrl 0, foo # encoding: [0xc6,0x02,A,A,A,A]
|
||||
# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
|
||||
#CHECK: pfdrl 15, foo # encoding: [0xc6,0xf2,A,A,A,A]
|
||||
# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
|
||||
|
||||
pfdrl 0, foo
|
||||
pfdrl 15, foo
|
||||
|
||||
#CHECK: pfdrl 3, bar+100 # encoding: [0xc6,0x32,A,A,A,A]
|
||||
# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
|
||||
#CHECK: pfdrl 4, bar+100 # encoding: [0xc6,0x42,A,A,A,A]
|
||||
# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
|
||||
|
||||
pfdrl 3, bar+100
|
||||
pfdrl 4, bar+100
|
||||
|
||||
#CHECK: pfdrl 7, frob@PLT # encoding: [0xc6,0x72,A,A,A,A]
|
||||
# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
|
||||
#CHECK: pfdrl 8, frob@PLT # encoding: [0xc6,0x82,A,A,A,A]
|
||||
# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
|
||||
|
||||
pfdrl 7, frob@PLT
|
||||
pfdrl 8, frob@PLT
|
||||
|
||||
#CHECK: risbg %r0, %r0, 0, 0, 0 # encoding: [0xec,0x00,0x00,0x00,0x00,0x55]
|
||||
#CHECK: risbg %r0, %r0, 0, 0, 63 # encoding: [0xec,0x00,0x00,0x00,0x3f,0x55]
|
||||
#CHECK: risbg %r0, %r0, 0, 255, 0 # encoding: [0xec,0x00,0x00,0xff,0x00,0x55]
|
||||
|
Loading…
Reference in New Issue
Block a user