mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-27 07:12:06 +00:00
[SelectionDAG] Add BUILD_VECTOR support to computeKnownBits and SimplifyDemandedBits
Add the ability to computeKnownBits and SimplifyDemandedBits to extract the known zero/one bits from BUILD_VECTOR, returning the known bits that are shared by every vector element. This is an initial step towards determining the sign bits of a vector (PR29079). Differential Revision: https://reviews.llvm.org/D24253 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280927 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
707719e625
commit
d88990b028
@ -2016,6 +2016,26 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
|
||||
KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
|
||||
KnownZero = ~KnownOne;
|
||||
break;
|
||||
case ISD::BUILD_VECTOR:
|
||||
// Collect the known bits that are shared by every vector element.
|
||||
KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
|
||||
for (SDValue SrcOp : Op->ops()) {
|
||||
computeKnownBits(SrcOp, KnownZero2, KnownOne2, Depth + 1);
|
||||
|
||||
// BUILD_VECTOR can implicitly truncate sources, we must handle this.
|
||||
if (SrcOp.getValueSizeInBits() != BitWidth) {
|
||||
assert(SrcOp.getValueSizeInBits() > BitWidth &&
|
||||
"Expected BUILD_VECTOR implicit truncation");
|
||||
KnownOne2 = KnownOne2.trunc(BitWidth);
|
||||
KnownZero2 = KnownZero2.trunc(BitWidth);
|
||||
}
|
||||
|
||||
// Known bits are the values that are shared by every element.
|
||||
// TODO: support per-element known bits.
|
||||
KnownOne &= KnownOne2;
|
||||
KnownZero &= KnownZero2;
|
||||
}
|
||||
break;
|
||||
case ISD::AND:
|
||||
// If either the LHS or the RHS are Zero, the result is zero.
|
||||
computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
|
||||
|
@ -468,6 +468,33 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
|
||||
KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
|
||||
KnownZero = ~KnownOne;
|
||||
return false; // Don't fall through, will infinitely loop.
|
||||
case ISD::BUILD_VECTOR:
|
||||
// Collect the known bits that are shared by every constant vector element.
|
||||
KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
|
||||
for (SDValue SrcOp : Op->ops()) {
|
||||
if (!isa<ConstantSDNode>(SrcOp)) {
|
||||
// We can only handle all constant values - bail out with no known bits.
|
||||
KnownZero = KnownOne = APInt(BitWidth, 0);
|
||||
return false;
|
||||
}
|
||||
KnownOne2 = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
|
||||
KnownZero2 = ~KnownOne2;
|
||||
|
||||
// BUILD_VECTOR can implicitly truncate sources, we must handle this.
|
||||
if (KnownOne2.getBitWidth() != BitWidth) {
|
||||
assert(KnownOne2.getBitWidth() > BitWidth &&
|
||||
KnownZero2.getBitWidth() > BitWidth &&
|
||||
"Expected BUILD_VECTOR implicit truncation");
|
||||
KnownOne2 = KnownOne2.trunc(BitWidth);
|
||||
KnownZero2 = KnownZero2.trunc(BitWidth);
|
||||
}
|
||||
|
||||
// Known bits are the values that are shared by every element.
|
||||
// TODO: support per-element known bits.
|
||||
KnownOne &= KnownOne2;
|
||||
KnownZero &= KnownZero2;
|
||||
}
|
||||
return false; // Don't fall through, will infinitely loop.
|
||||
case ISD::AND:
|
||||
// If the RHS is a constant, check to see if the LHS would be zero without
|
||||
// using the bits from the RHS. Below, we use knowledge about the RHS to
|
||||
|
@ -138,7 +138,7 @@ define void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x
|
||||
; v2i16 is naturally 4 byte aligned
|
||||
; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
|
||||
; TODO: This should use DST, but for some there are redundant MOVs
|
||||
; EG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
|
||||
; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
|
||||
; EG: 16
|
||||
define void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <2 x i16>, <2 x i16> addrspace(2)* %in
|
||||
@ -212,9 +212,10 @@ entry:
|
||||
; v4i16 is naturally 8 byte aligned
|
||||
; EG: VTX_READ_64 [[DST:T[0-9]\.XY]], {{T[0-9].[XYZW]}}, 0, #1
|
||||
; TODO: These should use DST, but for some there are redundant MOVs
|
||||
; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
|
||||
; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
|
||||
; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
|
||||
; EG-DAG: 16
|
||||
; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
|
||||
; EG-DAG: AND_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
|
||||
; EG-DAG: 16
|
||||
define void @constant_constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <4 x i16>, <4 x i16> addrspace(2)* %in
|
||||
|
@ -147,7 +147,7 @@ define void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i
|
||||
|
||||
; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
|
||||
; TODO: This should use DST, but for some there are redundant MOVs
|
||||
; EG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
|
||||
; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
|
||||
; EG: 16
|
||||
define void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
|
||||
%load = load <2 x i16>, <2 x i16> addrspace(1)* %in
|
||||
@ -219,9 +219,10 @@ entry:
|
||||
|
||||
; EG: VTX_READ_64 [[DST:T[0-9]\.XY]], {{T[0-9].[XYZW]}}, 0, #1
|
||||
; TODO: These should use DST, but for some there are redundant MOVs
|
||||
; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
|
||||
; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
|
||||
; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
|
||||
; EG-DAG: 16
|
||||
; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
|
||||
; EG-DAG: AND_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
|
||||
; EG-DAG: 16
|
||||
define void @global_global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
|
||||
%load = load <4 x i16>, <4 x i16> addrspace(1)* %in
|
||||
|
@ -210,10 +210,7 @@ define <4 x i32> @and_or_v4i32(<4 x i32> %a0) {
|
||||
define <2 x i64> @and_or_zext_v2i32(<2 x i32> %a0) {
|
||||
; CHECK-LABEL: and_or_zext_v2i32:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: pxor %xmm1, %xmm1
|
||||
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
|
||||
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%1 = zext <2 x i32> %a0 to <2 x i64>
|
||||
%2 = or <2 x i64> %1, <i64 1, i64 1>
|
||||
@ -224,10 +221,7 @@ define <2 x i64> @and_or_zext_v2i32(<2 x i32> %a0) {
|
||||
define <4 x i32> @and_or_zext_v4i16(<4 x i16> %a0) {
|
||||
; CHECK-LABEL: and_or_zext_v4i16:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: pxor %xmm1, %xmm1
|
||||
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
|
||||
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%1 = zext <4 x i16> %a0 to <4 x i32>
|
||||
%2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
|
||||
|
Loading…
x
Reference in New Issue
Block a user