mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-29 16:05:28 +00:00
[X86][SSE] Improve target shuffle mask extraction
Add ability to extract vXi64 'vzext_movl' masks on 32-bit targets git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@281834 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
83d088fe4b
commit
cbaa900ad9
@ -4763,6 +4763,7 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
|
|||||||
|
|
||||||
MVT VT = MaskNode.getSimpleValueType();
|
MVT VT = MaskNode.getSimpleValueType();
|
||||||
assert(VT.isVector() && "Can't produce a non-vector with a build_vector!");
|
assert(VT.isVector() && "Can't produce a non-vector with a build_vector!");
|
||||||
|
unsigned NumMaskElts = VT.getSizeInBits() / MaskEltSizeInBits;
|
||||||
|
|
||||||
// Split an APInt element into MaskEltSizeInBits sized pieces and
|
// Split an APInt element into MaskEltSizeInBits sized pieces and
|
||||||
// insert into the shuffle mask.
|
// insert into the shuffle mask.
|
||||||
@ -4794,18 +4795,21 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
|
|||||||
|
|
||||||
if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL &&
|
if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL &&
|
||||||
MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
|
MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
|
||||||
|
|
||||||
// TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
|
|
||||||
if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)
|
|
||||||
return false;
|
|
||||||
unsigned ElementSplit = VT.getScalarSizeInBits() / MaskEltSizeInBits;
|
|
||||||
|
|
||||||
SDValue MaskOp = MaskNode.getOperand(0).getOperand(0);
|
SDValue MaskOp = MaskNode.getOperand(0).getOperand(0);
|
||||||
if (auto *CN = dyn_cast<ConstantSDNode>(MaskOp)) {
|
if (auto *CN = dyn_cast<ConstantSDNode>(MaskOp)) {
|
||||||
|
if ((MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0) {
|
||||||
|
RawMask.push_back(CN->getZExtValue());
|
||||||
|
RawMask.append(NumMaskElts - 1, 0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0) {
|
||||||
|
unsigned ElementSplit = VT.getScalarSizeInBits() / MaskEltSizeInBits;
|
||||||
SplitElementToMask(CN->getAPIntValue());
|
SplitElementToMask(CN->getAPIntValue());
|
||||||
RawMask.append((VT.getVectorNumElements() - 1) * ElementSplit, 0);
|
RawMask.append((VT.getVectorNumElements() - 1) * ElementSplit, 0);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4815,7 +4819,7 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
|
|||||||
// We can always decode if the buildvector is all zero constants,
|
// We can always decode if the buildvector is all zero constants,
|
||||||
// but can't use isBuildVectorAllZeros as it might contain UNDEFs.
|
// but can't use isBuildVectorAllZeros as it might contain UNDEFs.
|
||||||
if (all_of(MaskNode->ops(), X86::isZeroNode)) {
|
if (all_of(MaskNode->ops(), X86::isZeroNode)) {
|
||||||
RawMask.append(VT.getSizeInBits() / MaskEltSizeInBits, 0);
|
RawMask.append(NumMaskElts, 0);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -258,10 +258,6 @@ define <8 x float> @combine_vpermilvar_8f32_movsldup(<8 x float> %a0) {
|
|||||||
define <2 x double> @combine_vpermilvar_2f64_identity(<2 x double> %a0) {
|
define <2 x double> @combine_vpermilvar_2f64_identity(<2 x double> %a0) {
|
||||||
; X32-LABEL: combine_vpermilvar_2f64_identity:
|
; X32-LABEL: combine_vpermilvar_2f64_identity:
|
||||||
; X32: # BB#0:
|
; X32: # BB#0:
|
||||||
; X32-NEXT: movl $2, %eax
|
|
||||||
; X32-NEXT: vmovd %eax, %xmm1
|
|
||||||
; X32-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
|
|
||||||
; X32-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
|
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: combine_vpermilvar_2f64_identity:
|
; X64-LABEL: combine_vpermilvar_2f64_identity:
|
||||||
@ -365,10 +361,7 @@ define <4 x float> @combine_vpermilvar_4f32_as_insertps(<4 x float> %a0) {
|
|||||||
define <2 x double> @constant_fold_vpermilvar_pd() {
|
define <2 x double> @constant_fold_vpermilvar_pd() {
|
||||||
; X32-LABEL: constant_fold_vpermilvar_pd:
|
; X32-LABEL: constant_fold_vpermilvar_pd:
|
||||||
; X32: # BB#0:
|
; X32: # BB#0:
|
||||||
; X32-NEXT: movl $2, %eax
|
; X32-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
|
||||||
; X32-NEXT: vmovd %eax, %xmm0
|
|
||||||
; X32-NEXT: vmovapd {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00]
|
|
||||||
; X32-NEXT: vpermilpd %xmm0, %xmm1, %xmm0
|
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: constant_fold_vpermilvar_pd:
|
; X64-LABEL: constant_fold_vpermilvar_pd:
|
||||||
|
@ -15,10 +15,7 @@ declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
|
|||||||
define <2 x double> @combine_vpermil2pd_identity(<2 x double> %a0, <2 x double> %a1) {
|
define <2 x double> @combine_vpermil2pd_identity(<2 x double> %a0, <2 x double> %a1) {
|
||||||
; X32-LABEL: combine_vpermil2pd_identity:
|
; X32-LABEL: combine_vpermil2pd_identity:
|
||||||
; X32: # BB#0:
|
; X32: # BB#0:
|
||||||
; X32-NEXT: movl $2, %eax
|
; X32-NEXT: vmovaps %xmm1, %xmm0
|
||||||
; X32-NEXT: vmovd %eax, %xmm2
|
|
||||||
; X32-NEXT: vpermil2pd $0, %xmm2, %xmm0, %xmm1, %xmm0
|
|
||||||
; X32-NEXT: vpermil2pd $0, %xmm2, %xmm0, %xmm0, %xmm0
|
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: combine_vpermil2pd_identity:
|
; X64-LABEL: combine_vpermil2pd_identity:
|
||||||
|
Loading…
Reference in New Issue
Block a user