[SystemZ] Add TM and TMY

The main complication here is that TM and TMY (the memory forms) set
CC differently from the register forms.  When the tested bits contain
some 0s and some 1s, the register forms set CC to 1 or 2 based on the
value the uppermost bit.  The memory forms instead set CC to 1
regardless of the uppermost bit.

Until now, I've tried to make it so that a branch never tests for an
impossible CC value.  E.g. NR only sets CC to 0 or 1, so branches on the
result will only test for 0 or 1.  Originally I'd tried to do the same
thing for TM and TMY by using custom matching code in ISelDAGToDAG.
That ended up being very ugly though, and would have meant duplicating
some of the chain checks that the common isel code does.

I've therefore gone for the simpler alternative of adding an extra
operand to the TM DAG opcode to say whether a memory form would be OK.
This means that the inverse of a "TM;JE" is "TM;JNE" rather than the
more precise "TM;JNLE", just like the inverse of "TMLL;JE" is "TMLL;JNE".
I suppose that's arguably less confusing though...


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190400 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Richard Sandiford 2013-09-10 10:20:32 +00:00
parent 436f64567c
commit 299fdd814f
8 changed files with 393 additions and 14 deletions

View File

@ -1226,13 +1226,13 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
return 0;
}
// See whether the comparison (Opcode CmpOp0, CmpOp1) can be implemented
// as a TEST UNDER MASK instruction when the condition being tested is
// as described by CCValid and CCMask. Update the arguments with the
// TM version if so.
// See whether the comparison (Opcode CmpOp0, CmpOp1, ICmpType) can be
// implemented as a TEST UNDER MASK instruction when the condition being
// tested is as described by CCValid and CCMask. Update the arguments
// with the TM version if so.
static void adjustForTestUnderMask(unsigned &Opcode, SDValue &CmpOp0,
SDValue &CmpOp1, unsigned &CCValid,
unsigned &CCMask, unsigned ICmpType) {
unsigned &CCMask, unsigned &ICmpType) {
// Check that we have a comparison with a constant.
ConstantSDNode *ConstCmpOp1 = dyn_cast<ConstantSDNode>(CmpOp1);
if (!ConstCmpOp1)
@ -1266,6 +1266,8 @@ static void adjustForTestUnderMask(unsigned &Opcode, SDValue &CmpOp0,
Opcode = SystemZISD::TM;
CmpOp0 = AndOp0;
CmpOp1 = AndOp1;
ICmpType = (bool(NewCCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
bool(NewCCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
CCValid = SystemZ::CCMASK_TM;
CCMask = NewCCMask;
}
@ -1315,7 +1317,7 @@ static SDValue emitCmp(const SystemZTargetMachine &TM, SelectionDAG &DAG,
}
adjustForTestUnderMask(Opcode, CmpOp0, CmpOp1, CCValid, CCMask, ICmpType);
if (Opcode == SystemZISD::ICMP)
if (Opcode == SystemZISD::ICMP || Opcode == SystemZISD::TM)
return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1,
DAG.getConstant(ICmpType, MVT::i32));
return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1);

View File

@ -46,7 +46,10 @@ namespace SystemZISD {
FCMP,
// Test under mask. The first operand is ANDed with the second operand
// and the condition codes are set on the result.
// and the condition codes are set on the result. The third operand is
// a boolean that is true if the condition codes need to distinguish
// between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the
// register forms do but the memory forms don't).
TM,
// Branches if a condition is true. Operand 0 is the chain operand;

View File

@ -1037,14 +1037,16 @@ let mayLoad = 1, Defs = [CC], Uses = [R0W] in
// Test under mask.
let Defs = [CC] in {
let isCodeGenOnly = 1 in {
def TMLL32 : CompareRI<"tmll", 0xA71, z_tm, GR32, imm32ll16>;
def TMLH32 : CompareRI<"tmlh", 0xA70, z_tm, GR32, imm32lh16>;
def TMLL32 : CompareRI<"tmll", 0xA71, z_tm_reg, GR32, imm32ll16>;
def TMLH32 : CompareRI<"tmlh", 0xA70, z_tm_reg, GR32, imm32lh16>;
}
def TMLL : CompareRI<"tmll", 0xA71, z_tm, GR64, imm64ll16>;
def TMLH : CompareRI<"tmlh", 0xA70, z_tm, GR64, imm64lh16>;
def TMHL : CompareRI<"tmhl", 0xA73, z_tm, GR64, imm64hl16>;
def TMHH : CompareRI<"tmhh", 0xA72, z_tm, GR64, imm64hh16>;
def TMLL : CompareRI<"tmll", 0xA71, z_tm_reg, GR64, imm64ll16>;
def TMLH : CompareRI<"tmlh", 0xA70, z_tm_reg, GR64, imm64lh16>;
def TMHL : CompareRI<"tmhl", 0xA73, z_tm_reg, GR64, imm64hl16>;
def TMHH : CompareRI<"tmhh", 0xA72, z_tm_reg, GR64, imm64hh16>;
defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, anyextloadi8, imm32zx8>;
}
//===----------------------------------------------------------------------===//

View File

@ -99,7 +99,7 @@ def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall,
def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>;
def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp, [SDNPOutGlue]>;
def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp, [SDNPOutGlue]>;
def z_tm : SDNode<"SystemZISD::TM", SDT_ZCmp, [SDNPOutGlue]>;
def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp, [SDNPOutGlue]>;
def z_br_ccmask : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask,
[SDNPHasChain, SDNPInGlue]>;
def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
@ -176,6 +176,10 @@ def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
return Type != SystemZICMP::SignedOnly;
}]>;
// Register- and memory-based TEST UNDER MASK.
def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, imm)>;
def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>;
// Register sign-extend operations. Sub-32-bit values are represented as i32s.
def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>;
def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>;

View File

@ -0,0 +1,245 @@
; Test the use of TM and TMY.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
@g = global i32 0
; Check a simple branching use of TM.
define void @f1(i8 *%src) {
; CHECK-LABEL: f1:
; CHECK: tm 0(%r2), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
entry:
%byte = load i8 *%src
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
br i1 %cmp, label %exit, label %store
store:
store i32 1, i32 *@g
br label %exit
exit:
ret void
}
; Check that we do not fold across an aliasing store.
define void @f2(i8 *%src) {
; CHECK-LABEL: f2:
; CHECK: llc [[REG:%r[0-5]]], 0(%r2)
; CHECK: mvi 0(%r2), 0
; CHECK: tmll [[REG]], 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
entry:
%byte = load i8 *%src
store i8 0, i8 *%src
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
br i1 %cmp, label %exit, label %store
store:
store i32 1, i32 *@g
br label %exit
exit:
ret void
}
; Check a simple select-based use of TM.
define double @f3(i8 *%src, double %a, double %b) {
; CHECK-LABEL: f3:
; CHECK: tm 0(%r2), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
%byte = load i8 *%src
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
ret double %res
}
; Check that we do not fold across an aliasing store.
define double @f4(i8 *%src, double %a, double %b) {
; CHECK-LABEL: f4:
; CHECK: tm 0(%r2), 1
; CHECK: je {{\.L.*}}
; CHECK: mvi 0(%r2), 0
; CHECK: br %r14
%byte = load i8 *%src
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
store i8 0, i8 *%src
ret double %res
}
; Check an inequality check.
define double @f5(i8 *%src, double %a, double %b) {
; CHECK-LABEL: f5:
; CHECK: tm 0(%r2), 1
; CHECK: jne {{\.L.*}}
; CHECK: br %r14
%byte = load i8 *%src
%and = and i8 %byte, 1
%cmp = icmp ne i8 %and, 0
%res = select i1 %cmp, double %b, double %a
ret double %res
}
; Check that we can also use TM for equality comparisons with the mask.
define double @f6(i8 *%src, double %a, double %b) {
; CHECK-LABEL: f6:
; CHECK: tm 0(%r2), 254
; CHECK: jo {{\.L.*}}
; CHECK: br %r14
%byte = load i8 *%src
%and = and i8 %byte, 254
%cmp = icmp eq i8 %and, 254
%res = select i1 %cmp, double %b, double %a
ret double %res
}
; Check inequality comparisons with the mask.
define double @f7(i8 *%src, double %a, double %b) {
; CHECK-LABEL: f7:
; CHECK: tm 0(%r2), 254
; CHECK: jno {{\.L.*}}
; CHECK: br %r14
%byte = load i8 *%src
%and = and i8 %byte, 254
%cmp = icmp ne i8 %and, 254
%res = select i1 %cmp, double %b, double %a
ret double %res
}
; Check that we do not use the memory TM instruction when CC is being tested
; for 2.
define double @f8(i8 *%src, double %a, double %b) {
; CHECK-LABEL: f8:
; CHECK: llc [[REG:%r[0-5]]], 0(%r2)
; CHECK: tmll [[REG]], 3
; CHECK: jh {{\.L.*}}
; CHECK: br %r14
%byte = load i8 *%src
%and = and i8 %byte, 3
%cmp = icmp eq i8 %and, 2
%res = select i1 %cmp, double %b, double %a
ret double %res
}
; ...likewise 1.
define double @f9(i8 *%src, double %a, double %b) {
; CHECK-LABEL: f9:
; CHECK: llc [[REG:%r[0-5]]], 0(%r2)
; CHECK: tmll [[REG]], 3
; CHECK: jl {{\.L.*}}
; CHECK: br %r14
%byte = load i8 *%src
%and = and i8 %byte, 3
%cmp = icmp eq i8 %and, 1
%res = select i1 %cmp, double %b, double %a
ret double %res
}
; Check the high end of the TM range.
define double @f10(i8 *%src, double %a, double %b) {
; CHECK-LABEL: f10:
; CHECK: tm 4095(%r2), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
%ptr = getelementptr i8 *%src, i64 4095
%byte = load i8 *%ptr
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
ret double %res
}
; Check the low end of the positive TMY range.
define double @f11(i8 *%src, double %a, double %b) {
; CHECK-LABEL: f11:
; CHECK: tmy 4096(%r2), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
%ptr = getelementptr i8 *%src, i64 4096
%byte = load i8 *%ptr
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
ret double %res
}
; Check the high end of the TMY range.
define double @f12(i8 *%src, double %a, double %b) {
; CHECK-LABEL: f12:
; CHECK: tmy 524287(%r2), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
%ptr = getelementptr i8 *%src, i64 524287
%byte = load i8 *%ptr
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
ret double %res
}
; Check the next byte up, which needs separate address logic.
define double @f13(i8 *%src, double %a, double %b) {
; CHECK-LABEL: f13:
; CHECK: agfi %r2, 524288
; CHECK: tm 0(%r2), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
%ptr = getelementptr i8 *%src, i64 524288
%byte = load i8 *%ptr
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
ret double %res
}
; Check the low end of the TMY range.
define double @f14(i8 *%src, double %a, double %b) {
; CHECK-LABEL: f14:
; CHECK: tmy -524288(%r2), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
%ptr = getelementptr i8 *%src, i64 -524288
%byte = load i8 *%ptr
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
ret double %res
}
; Check the next byte down, which needs separate address logic.
define double @f15(i8 *%src, double %a, double %b) {
; CHECK-LABEL: f15:
; CHECK: agfi %r2, -524289
; CHECK: tm 0(%r2), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
%ptr = getelementptr i8 *%src, i64 -524289
%byte = load i8 *%ptr
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
ret double %res
}
; Check that TM(Y) does not allow an index
define double @f16(i8 *%src, i64 %index, double %a, double %b) {
; CHECK-LABEL: f16:
; CHECK: tm 0({{%r[1-5]}}), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
%ptr = getelementptr i8 *%src, i64 %index
%byte = load i8 *%ptr
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
ret double %res
}

View File

@ -7036,6 +7036,27 @@
# CHECK: sy %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0x5b
# CHECK: tm 0, 0
0x91 0x00 0x00 0x00
# CHECK: tm 4095, 0
0x91 0x00 0x0f 0xff
# CHECK: tm 0, 255
0x91 0xff 0x00 0x00
# CHECK: tm 0(%r1), 42
0x91 0x2a 0x10 0x00
# CHECK: tm 0(%r15), 42
0x91 0x2a 0xf0 0x00
# CHECK: tm 4095(%r1), 42
0x91 0x2a 0x1f 0xff
# CHECK: tm 4095(%r15), 42
0x91 0x2a 0xff 0xff
# CHECK: tmhh %r0, 0
0xa7 0x02 0x00 0x00
@ -7084,6 +7105,36 @@
# CHECK: tmll %r15, 0
0xa7 0xf1 0x00 0x00
# CHECK: tmy -524288, 0
0xeb 0x00 0x00 0x00 0x80 0x51
# CHECK: tmy -1, 0
0xeb 0x00 0x0f 0xff 0xff 0x51
# CHECK: tmy 0, 0
0xeb 0x00 0x00 0x00 0x00 0x51
# CHECK: tmy 1, 0
0xeb 0x00 0x00 0x01 0x00 0x51
# CHECK: tmy 524287, 0
0xeb 0x00 0x0f 0xff 0x7f 0x51
# CHECK: tmy 0, 255
0xeb 0xff 0x00 0x00 0x00 0x51
# CHECK: tmy 0(%r1), 42
0xeb 0x2a 0x10 0x00 0x00 0x51
# CHECK: tmy 0(%r15), 42
0xeb 0x2a 0xf0 0x00 0x00 0x51
# CHECK: tmy 524287(%r1), 42
0xeb 0x2a 0x1f 0xff 0x7f 0x51
# CHECK: tmy 524287(%r15), 42
0xeb 0x2a 0xff 0xff 0x7f 0x51
# CHECK: xc 0(1), 0
0xd7 0x00 0x00 0x00 0x00 0x00

View File

@ -2951,6 +2951,23 @@
sy %r0, -524289
sy %r0, 524288
#CHECK: error: invalid operand
#CHECK: tm -1, 0
#CHECK: error: invalid operand
#CHECK: tm 4096, 0
#CHECK: error: invalid use of indexed addressing
#CHECK: tm 0(%r1,%r2), 0
#CHECK: error: invalid operand
#CHECK: tm 0, -1
#CHECK: error: invalid operand
#CHECK: tm 0, 256
tm -1, 0
tm 4096, 0
tm 0(%r1,%r2), 0
tm 0, -1
tm 0, 256
#CHECK: error: invalid operand
#CHECK: tmhh %r0, -1
#CHECK: error: invalid operand
@ -2983,6 +3000,23 @@
tmll %r0, -1
tmll %r0, 0x10000
#CHECK: error: invalid operand
#CHECK: tmy -524289, 0
#CHECK: error: invalid operand
#CHECK: tmy 524288, 0
#CHECK: error: invalid use of indexed addressing
#CHECK: tmy 0(%r1,%r2), 0
#CHECK: error: invalid operand
#CHECK: tmy 0, -1
#CHECK: error: invalid operand
#CHECK: tmy 0, 256
tmy -524289, 0
tmy 524288, 0
tmy 0(%r1,%r2), 0
tmy 0, -1
tmy 0, 256
#CHECK: error: invalid operand
#CHECK: x %r0, -1
#CHECK: error: invalid operand

View File

@ -7336,6 +7336,22 @@
sy %r0, 524287(%r15,%r1)
sy %r15, 0
#CHECK: tm 0, 0 # encoding: [0x91,0x00,0x00,0x00]
#CHECK: tm 4095, 0 # encoding: [0x91,0x00,0x0f,0xff]
#CHECK: tm 0, 255 # encoding: [0x91,0xff,0x00,0x00]
#CHECK: tm 0(%r1), 42 # encoding: [0x91,0x2a,0x10,0x00]
#CHECK: tm 0(%r15), 42 # encoding: [0x91,0x2a,0xf0,0x00]
#CHECK: tm 4095(%r1), 42 # encoding: [0x91,0x2a,0x1f,0xff]
#CHECK: tm 4095(%r15), 42 # encoding: [0x91,0x2a,0xff,0xff]
tm 0, 0
tm 4095, 0
tm 0, 255
tm 0(%r1), 42
tm 0(%r15), 42
tm 4095(%r1), 42
tm 4095(%r15), 42
#CHECK: tmhh %r0, 0 # encoding: [0xa7,0x02,0x00,0x00]
#CHECK: tmhh %r0, 32768 # encoding: [0xa7,0x02,0x80,0x00]
#CHECK: tmhh %r0, 65535 # encoding: [0xa7,0x02,0xff,0xff]
@ -7376,6 +7392,28 @@
tmll %r0, 0xffff
tmll %r15, 0
#CHECK: tmy -524288, 0 # encoding: [0xeb,0x00,0x00,0x00,0x80,0x51]
#CHECK: tmy -1, 0 # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x51]
#CHECK: tmy 0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0x51]
#CHECK: tmy 1, 0 # encoding: [0xeb,0x00,0x00,0x01,0x00,0x51]
#CHECK: tmy 524287, 0 # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x51]
#CHECK: tmy 0, 255 # encoding: [0xeb,0xff,0x00,0x00,0x00,0x51]
#CHECK: tmy 0(%r1), 42 # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x51]
#CHECK: tmy 0(%r15), 42 # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x51]
#CHECK: tmy 524287(%r1), 42 # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x51]
#CHECK: tmy 524287(%r15), 42 # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x51]
tmy -524288, 0
tmy -1, 0
tmy 0, 0
tmy 1, 0
tmy 524287, 0
tmy 0, 255
tmy 0(%r1), 42
tmy 0(%r15), 42
tmy 524287(%r1), 42
tmy 524287(%r15), 42
#CHECK: x %r0, 0 # encoding: [0x57,0x00,0x00,0x00]
#CHECK: x %r0, 4095 # encoding: [0x57,0x00,0x0f,0xff]
#CHECK: x %r0, 0(%r1) # encoding: [0x57,0x00,0x10,0x00]