mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-17 00:27:31 +00:00
Additional fixes for bug 15155.
This handles the cases where the 6-bit splat element is odd, converting to a three-instruction sequence to add or subtract two splats. With this fix, the XFAIL in test/CodeGen/PowerPC/vec_constants.ll is removed. llvm-svn: 175663
This commit is contained in:
parent
95d35d8c5e
commit
bcb4fa48fa
@ -1323,34 +1323,75 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
|
||||
SDValue(Tmp, 0), GA);
|
||||
}
|
||||
case PPCISD::VADD_SPLAT: {
|
||||
// Convert: VADD_SPLAT elt, size
|
||||
// Into: tmp = VSPLTIS[BHW] elt
|
||||
// VADDU[BHW]M tmp, tmp
|
||||
// Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
|
||||
// This expands into one of three sequences, depending on whether
|
||||
// the first operand is odd or even, positive or negative.
|
||||
assert(isa<ConstantSDNode>(N->getOperand(0)) &&
|
||||
isa<ConstantSDNode>(N->getOperand(1)) &&
|
||||
"Invalid operand on VADD_SPLAT!");
|
||||
|
||||
int Elt = N->getConstantOperandVal(0);
|
||||
int EltSize = N->getConstantOperandVal(1);
|
||||
unsigned Opc1, Opc2;
|
||||
unsigned Opc1, Opc2, Opc3;
|
||||
EVT VT;
|
||||
|
||||
if (EltSize == 1) {
|
||||
Opc1 = PPC::VSPLTISB;
|
||||
Opc2 = PPC::VADDUBM;
|
||||
Opc3 = PPC::VSUBUBM;
|
||||
VT = MVT::v16i8;
|
||||
} else if (EltSize == 2) {
|
||||
Opc1 = PPC::VSPLTISH;
|
||||
Opc2 = PPC::VADDUHM;
|
||||
Opc3 = PPC::VSUBUHM;
|
||||
VT = MVT::v8i16;
|
||||
} else {
|
||||
assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
|
||||
Opc1 = PPC::VSPLTISW;
|
||||
Opc2 = PPC::VADDUWM;
|
||||
Opc3 = PPC::VSUBUWM;
|
||||
VT = MVT::v4i32;
|
||||
}
|
||||
SDValue Elt = getI32Imm(N->getConstantOperandVal(0));
|
||||
SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, Elt);
|
||||
SDValue TmpVal = SDValue(Tmp, 0);
|
||||
return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
|
||||
|
||||
if ((Elt & 1) == 0) {
|
||||
// Elt is even, in the range [-32,-18] + [16,30].
|
||||
//
|
||||
// Convert: VADD_SPLAT elt, size
|
||||
// Into: tmp = VSPLTIS[BHW] elt
|
||||
// VADDU[BHW]M tmp, tmp
|
||||
// Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
|
||||
SDValue EltVal = getI32Imm(Elt >> 1);
|
||||
SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
|
||||
SDValue TmpVal = SDValue(Tmp, 0);
|
||||
return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
|
||||
|
||||
} else if (Elt > 0) {
|
||||
// Elt is odd and positive, in the range [17,31].
|
||||
//
|
||||
// Convert: VADD_SPLAT elt, size
|
||||
// Into: tmp1 = VSPLTIS[BHW] elt-16
|
||||
// tmp2 = VSPLTIS[BHW] -16
|
||||
// VSUBU[BHW]M tmp1, tmp2
|
||||
SDValue EltVal = getI32Imm(Elt - 16);
|
||||
SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
|
||||
EltVal = getI32Imm(-16);
|
||||
SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
|
||||
return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
|
||||
SDValue(Tmp2, 0));
|
||||
|
||||
} else {
|
||||
// Elt is odd and negative, in the range [-31,-17].
|
||||
//
|
||||
// Convert: VADD_SPLAT elt, size
|
||||
// Into: tmp1 = VSPLTIS[BHW] elt+16
|
||||
// tmp2 = VSPLTIS[BHW] -16
|
||||
// VADDU[BHW]M tmp1, tmp2
|
||||
SDValue EltVal = getI32Imm(Elt + 16);
|
||||
SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
|
||||
EltVal = getI32Imm(-16);
|
||||
SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
|
||||
return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
|
||||
SDValue(Tmp2, 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5025,11 +5025,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
||||
// Two instruction sequences.
|
||||
|
||||
// If this value is in the range [-32,30] and is even, use:
|
||||
// tmp = VSPLTI[bhw], result = add tmp, tmp
|
||||
if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
|
||||
// To avoid having the optimization undone by constant folding, we
|
||||
// convert to a pseudo that will be expanded later.
|
||||
SDValue Elt = DAG.getConstant(SextVal >> 1, MVT::i32);
|
||||
// VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
|
||||
// If this value is in the range [17,31] and is odd, use:
|
||||
// VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
|
||||
// If this value is in the range [-31,-17] and is odd, use:
|
||||
// VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
|
||||
// Note the last two are three-instruction sequences.
|
||||
if (SextVal >= -32 && SextVal <= 31) {
|
||||
// To avoid having these optimizations undone by constant folding,
|
||||
// we convert to a pseudo that will be expanded later into one of
|
||||
// the above forms.
|
||||
SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
|
||||
EVT VT = Op.getValueType();
|
||||
int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);
|
||||
SDValue EltSize = DAG.getConstant(Size, MVT::i32);
|
||||
@ -5129,25 +5135,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
||||
}
|
||||
}
|
||||
|
||||
// Three instruction sequences.
|
||||
|
||||
// Odd, in range [17,31]: (vsplti C)-(vsplti -16).
|
||||
// FIXME: Disabled because the add gets constant folded.
|
||||
if (0 && SextVal >= 0 && SextVal <= 31) {
|
||||
SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
|
||||
SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
|
||||
LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
|
||||
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
|
||||
}
|
||||
// Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
|
||||
// FIXME: Disabled because the add gets constant folded.
|
||||
if (0 && SextVal >= -31 && SextVal <= 0) {
|
||||
SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
|
||||
SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
|
||||
LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
|
||||
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -238,8 +238,9 @@ namespace llvm {
|
||||
ADDI_DTPREL_L,
|
||||
|
||||
/// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
|
||||
/// into an ADD of a VSPLTI with itself during instruction selection.
|
||||
/// Necessary to avoid losing this optimization due to constant folds.
|
||||
/// during instruction selection to optimize a BUILD_VECTOR into
|
||||
/// operations on splats. This is necessary to avoid losing these
|
||||
/// optimizations due to constant folding.
|
||||
VADD_SPLAT,
|
||||
|
||||
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -O0 -mcpu=pwr7 <%s | FileCheck %s
|
||||
|
||||
; Test optimization of build_vector into vadd/vsplt for 6-bit immediates.
|
||||
; Test optimizations of build_vector for 6-bit immediates.
|
||||
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
@ -9,69 +9,141 @@ target triple = "powerpc64-unknown-linux-gnu"
|
||||
%v8i16 = type <8 x i16>
|
||||
%v16i8 = type <16 x i8>
|
||||
|
||||
define void @test_v4i32_pos(%v4i32* %P, %v4i32* %S) {
|
||||
define void @test_v4i32_pos_even(%v4i32* %P, %v4i32* %S) {
|
||||
%p = load %v4i32* %P
|
||||
%r = add %v4i32 %p, < i32 18, i32 18, i32 18, i32 18 >
|
||||
store %v4i32 %r, %v4i32* %S
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: test_v4i32_pos:
|
||||
; CHECK: test_v4i32_pos_even:
|
||||
; CHECK: vspltisw [[REG1:[0-9]+]], 9
|
||||
; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
|
||||
|
||||
define void @test_v4i32_neg(%v4i32* %P, %v4i32* %S) {
|
||||
define void @test_v4i32_neg_even(%v4i32* %P, %v4i32* %S) {
|
||||
%p = load %v4i32* %P
|
||||
%r = add %v4i32 %p, < i32 -28, i32 -28, i32 -28, i32 -28 >
|
||||
store %v4i32 %r, %v4i32* %S
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: test_v4i32_neg:
|
||||
; CHECK: test_v4i32_neg_even:
|
||||
; CHECK: vspltisw [[REG1:[0-9]+]], -14
|
||||
; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
|
||||
|
||||
define void @test_v8i16_pos(%v8i16* %P, %v8i16* %S) {
|
||||
define void @test_v8i16_pos_even(%v8i16* %P, %v8i16* %S) {
|
||||
%p = load %v8i16* %P
|
||||
%r = add %v8i16 %p, < i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30 >
|
||||
store %v8i16 %r, %v8i16* %S
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: test_v8i16_pos:
|
||||
; CHECK: test_v8i16_pos_even:
|
||||
; CHECK: vspltish [[REG1:[0-9]+]], 15
|
||||
; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
|
||||
|
||||
define void @test_v8i16_neg(%v8i16* %P, %v8i16* %S) {
|
||||
define void @test_v8i16_neg_even(%v8i16* %P, %v8i16* %S) {
|
||||
%p = load %v8i16* %P
|
||||
%r = add %v8i16 %p, < i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32 >
|
||||
store %v8i16 %r, %v8i16* %S
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: test_v8i16_neg:
|
||||
; CHECK: test_v8i16_neg_even:
|
||||
; CHECK: vspltish [[REG1:[0-9]+]], -16
|
||||
; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
|
||||
|
||||
define void @test_v16i8_pos(%v16i8* %P, %v16i8* %S) {
|
||||
define void @test_v16i8_pos_even(%v16i8* %P, %v16i8* %S) {
|
||||
%p = load %v16i8* %P
|
||||
%r = add %v16i8 %p, < i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16 >
|
||||
store %v16i8 %r, %v16i8* %S
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: test_v16i8_pos:
|
||||
; CHECK: test_v16i8_pos_even:
|
||||
; CHECK: vspltisb [[REG1:[0-9]+]], 8
|
||||
; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
|
||||
|
||||
define void @test_v16i8_neg(%v16i8* %P, %v16i8* %S) {
|
||||
define void @test_v16i8_neg_even(%v16i8* %P, %v16i8* %S) {
|
||||
%p = load %v16i8* %P
|
||||
%r = add %v16i8 %p, < i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18 >
|
||||
store %v16i8 %r, %v16i8* %S
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: test_v16i8_neg:
|
||||
; CHECK: test_v16i8_neg_even:
|
||||
; CHECK: vspltisb [[REG1:[0-9]+]], -9
|
||||
; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
|
||||
|
||||
define void @test_v4i32_pos_odd(%v4i32* %P, %v4i32* %S) {
|
||||
%p = load %v4i32* %P
|
||||
%r = add %v4i32 %p, < i32 27, i32 27, i32 27, i32 27 >
|
||||
store %v4i32 %r, %v4i32* %S
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: test_v4i32_pos_odd:
|
||||
; CHECK: vspltisw [[REG2:[0-9]+]], -16
|
||||
; CHECK: vspltisw [[REG1:[0-9]+]], 11
|
||||
; CHECK: vsubuwm {{[0-9]+}}, [[REG1]], [[REG2]]
|
||||
|
||||
define void @test_v4i32_neg_odd(%v4i32* %P, %v4i32* %S) {
|
||||
%p = load %v4i32* %P
|
||||
%r = add %v4i32 %p, < i32 -27, i32 -27, i32 -27, i32 -27 >
|
||||
store %v4i32 %r, %v4i32* %S
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: test_v4i32_neg_odd:
|
||||
; CHECK: vspltisw [[REG2:[0-9]+]], -16
|
||||
; CHECK: vspltisw [[REG1:[0-9]+]], -11
|
||||
; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG2]]
|
||||
|
||||
define void @test_v8i16_pos_odd(%v8i16* %P, %v8i16* %S) {
|
||||
%p = load %v8i16* %P
|
||||
%r = add %v8i16 %p, < i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31 >
|
||||
store %v8i16 %r, %v8i16* %S
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: test_v8i16_pos_odd:
|
||||
; CHECK: vspltish [[REG2:[0-9]+]], -16
|
||||
; CHECK: vspltish [[REG1:[0-9]+]], 15
|
||||
; CHECK: vsubuhm {{[0-9]+}}, [[REG1]], [[REG2]]
|
||||
|
||||
define void @test_v8i16_neg_odd(%v8i16* %P, %v8i16* %S) {
|
||||
%p = load %v8i16* %P
|
||||
%r = add %v8i16 %p, < i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31 >
|
||||
store %v8i16 %r, %v8i16* %S
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: test_v8i16_neg_odd:
|
||||
; CHECK: vspltish [[REG2:[0-9]+]], -16
|
||||
; CHECK: vspltish [[REG1:[0-9]+]], -15
|
||||
; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG2]]
|
||||
|
||||
define void @test_v16i8_pos_odd(%v16i8* %P, %v16i8* %S) {
|
||||
%p = load %v16i8* %P
|
||||
%r = add %v16i8 %p, < i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17 >
|
||||
store %v16i8 %r, %v16i8* %S
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: test_v16i8_pos_odd:
|
||||
; CHECK: vspltisb [[REG2:[0-9]+]], -16
|
||||
; CHECK: vspltisb [[REG1:[0-9]+]], 1
|
||||
; CHECK: vsububm {{[0-9]+}}, [[REG1]], [[REG2]]
|
||||
|
||||
define void @test_v16i8_neg_odd(%v16i8* %P, %v16i8* %S) {
|
||||
%p = load %v16i8* %P
|
||||
%r = add %v16i8 %p, < i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17 >
|
||||
store %v16i8 %r, %v16i8* %S
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: test_v16i8_neg_odd:
|
||||
; CHECK: vspltisb [[REG2:[0-9]+]], -16
|
||||
; CHECK: vspltisb [[REG1:[0-9]+]], -1
|
||||
; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG2]]
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
; RUN: llc < %s -march=ppc32 -mcpu=g5 | FileCheck %s
|
||||
; XFAIL: *
|
||||
|
||||
define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
|
||||
%tmp = load <4 x i32>* %P1 ; <<4 x i32>> [#uses=1]
|
||||
|
Loading…
Reference in New Issue
Block a user