mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-03 01:12:53 +00:00
Make sure that the lower bits on the VSELECT condition are properly set.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@146800 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2027379985
commit
8b99c1e42c
@ -10168,48 +10168,54 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
|
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
|
||||||
}
|
}
|
||||||
if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
|
if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
|
||||||
|
assert((Subtarget->hasSSE2() || Subtarget->hasAVX()) &&
|
||||||
|
"Need SSE2 for pslli/pcmpeq.");
|
||||||
|
|
||||||
// a = a << 5;
|
// a = a << 5;
|
||||||
Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
|
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
|
||||||
Op.getOperand(1), DAG.getConstant(5, MVT::i32));
|
Op.getOperand(1), DAG.getConstant(5, MVT::i32));
|
||||||
|
|
||||||
ConstantInt *CM1 = ConstantInt::get(*Context, APInt(8, 15));
|
// Turn 'a' into a mask suitable for VSELECT
|
||||||
ConstantInt *CM2 = ConstantInt::get(*Context, APInt(8, 63));
|
SDValue VSelM = DAG.getConstant(0x80, VT);
|
||||||
|
SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
|
||||||
|
OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||||
|
DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32),
|
||||||
|
OpVSel, VSelM);
|
||||||
|
|
||||||
std::vector<Constant*> CVM1(16, CM1);
|
SDValue CM1 = DAG.getConstant(0x0f, VT);
|
||||||
std::vector<Constant*> CVM2(16, CM2);
|
SDValue CM2 = DAG.getConstant(0x3f, VT);
|
||||||
Constant *C = ConstantVector::get(CVM1);
|
|
||||||
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
|
|
||||||
SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
|
|
||||||
MachinePointerInfo::getConstantPool(),
|
|
||||||
false, false, false, 16);
|
|
||||||
|
|
||||||
// r = pblendv(r, psllw(r & (char16)15, 4), a);
|
// r = VSELECT(r, psllw(r & (char16)15, 4), a);
|
||||||
M = DAG.getNode(ISD::AND, dl, VT, R, M);
|
SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1);
|
||||||
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
|
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
|
||||||
DAG.getConstant(4, MVT::i32));
|
DAG.getConstant(4, MVT::i32));
|
||||||
R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R);
|
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
|
||||||
|
|
||||||
// a += a
|
// a += a
|
||||||
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
|
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
|
||||||
|
OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
|
||||||
|
OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||||
|
DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32),
|
||||||
|
OpVSel, VSelM);
|
||||||
|
|
||||||
C = ConstantVector::get(CVM2);
|
// r = VSELECT(r, psllw(r & (char16)63, 2), a);
|
||||||
CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
|
M = DAG.getNode(ISD::AND, dl, VT, R, CM2);
|
||||||
M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
|
|
||||||
MachinePointerInfo::getConstantPool(),
|
|
||||||
false, false, false, 16);
|
|
||||||
|
|
||||||
// r = pblendv(r, psllw(r & (char16)63, 2), a);
|
|
||||||
M = DAG.getNode(ISD::AND, dl, VT, R, M);
|
|
||||||
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
|
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
|
||||||
DAG.getConstant(2, MVT::i32));
|
DAG.getConstant(2, MVT::i32));
|
||||||
R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R);
|
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
|
||||||
|
|
||||||
// a += a
|
// a += a
|
||||||
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
|
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
|
||||||
|
OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
|
||||||
|
OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||||
|
DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32),
|
||||||
|
OpVSel, VSelM);
|
||||||
|
|
||||||
// return pblendv(r, r+r, a);
|
// return VSELECT(r, r+r, a);
|
||||||
R = DAG.getNode(ISD::VSELECT, dl, VT, Op,
|
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel,
|
||||||
DAG.getNode(ISD::ADD, dl, VT, R, R), R);
|
DAG.getNode(ISD::ADD, dl, VT, R, R), R);
|
||||||
return R;
|
return R;
|
||||||
}
|
}
|
||||||
|
@ -1,12 +1,19 @@
|
|||||||
; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
|
; RUN: llc -march=x86-64 -mattr=+sse41 < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
|
||||||
|
; RUN: llc -march=x86-64 -mattr=-sse41 < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
|
||||||
; Test case for r146671
|
; Test case for r146671
|
||||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
target triple = "x86_64-apple-macosx10.7"
|
target triple = "x86_64-apple-macosx10.7"
|
||||||
|
|
||||||
define <16 x i8> @shift(<16 x i8> %a, <16 x i8> %b) nounwind {
|
define <16 x i8> @shift(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||||
; CHECK: psllw $4, [[REG:%xmm.]]
|
; Make sure operands to pblend are in the right order.
|
||||||
; CHECK-NEXT: movdqa
|
; CHECK-W-SSE4: psllw $4, [[REG1:%xmm.]]
|
||||||
; CHECK-NEXT: pblendvb [[REG]],{{ %xmm.}}
|
; CHECK-W-SSE4: pblendvb [[REG1]],{{ %xmm.}}
|
||||||
|
; CHECK-W-SSE4: psllw $2
|
||||||
|
|
||||||
|
; Make sure we're masking and pcmp'ing the VSELECT conditon vector.
|
||||||
|
; CHECK-WO-SSE4: psllw $5, [[REG1:%xmm.]]
|
||||||
|
; CHECK-WO-SSE4: pand [[REG1]], [[REG2:%xmm.]]
|
||||||
|
; CHECK-WO-SSE4: pcmpeqb {{%xmm., }}[[REG2]]
|
||||||
%1 = shl <16 x i8> %a, %b
|
%1 = shl <16 x i8> %a, %b
|
||||||
ret <16 x i8> %1
|
ret <16 x i8> %1
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user