mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-15 07:59:57 +00:00
[X86] Fix DecodeVPERMVMask to handle cases where the constant pool entry has a different type than the shuffle itself.
This is especially important for 32-bit targets with 64-bit shuffle elements. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284453 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3c59fd410e
commit
63ae3007f1
@ -5091,7 +5091,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (auto *C = getTargetConstantFromNode(MaskNode)) {
|
if (auto *C = getTargetConstantFromNode(MaskNode)) {
|
||||||
DecodeVPERMVMask(C, VT, Mask);
|
DecodeVPERMVMask(C, MaskEltSize, Mask);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -275,38 +275,32 @@ void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DecodeVPERMVMask(const Constant *C, MVT VT,
|
void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
|
||||||
SmallVectorImpl<int> &ShuffleMask) {
|
SmallVectorImpl<int> &ShuffleMask) {
|
||||||
Type *MaskTy = C->getType();
|
Type *MaskTy = C->getType();
|
||||||
if (MaskTy->isVectorTy()) {
|
unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
|
||||||
unsigned NumElements = MaskTy->getVectorNumElements();
|
(void)MaskTySize;
|
||||||
if (NumElements == VT.getVectorNumElements()) {
|
assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
|
||||||
unsigned EltMaskSize = Log2_64(NumElements);
|
"Unexpected vector size.");
|
||||||
for (unsigned i = 0; i < NumElements; ++i) {
|
assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
|
||||||
Constant *COp = C->getAggregateElement(i);
|
"Unexpected vector element size.");
|
||||||
if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) {
|
|
||||||
ShuffleMask.clear();
|
// The shuffle mask requires elements the same size as the target.
|
||||||
return;
|
SmallBitVector UndefElts;
|
||||||
}
|
SmallVector<uint64_t, 8> RawMask;
|
||||||
if (isa<UndefValue>(COp))
|
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
|
||||||
ShuffleMask.push_back(SM_SentinelUndef);
|
return;
|
||||||
else {
|
|
||||||
APInt Element = cast<ConstantInt>(COp)->getValue();
|
unsigned NumElts = RawMask.size();
|
||||||
Element = Element.getLoBits(EltMaskSize);
|
|
||||||
ShuffleMask.push_back(Element.getZExtValue());
|
for (unsigned i = 0; i != NumElts; ++i) {
|
||||||
}
|
if (UndefElts[i]) {
|
||||||
}
|
ShuffleMask.push_back(SM_SentinelUndef);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
return;
|
int Index = RawMask[i] & (NumElts - 1);
|
||||||
|
ShuffleMask.push_back(Index);
|
||||||
}
|
}
|
||||||
// Scalar value; just broadcast it
|
|
||||||
if (!isa<ConstantInt>(C))
|
|
||||||
return;
|
|
||||||
uint64_t Element = cast<ConstantInt>(C)->getZExtValue();
|
|
||||||
int NumElements = VT.getVectorNumElements();
|
|
||||||
Element &= (1 << NumElements) - 1;
|
|
||||||
for (int i = 0; i < NumElements; ++i)
|
|
||||||
ShuffleMask.push_back(Element);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
|
void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
|
||||||
|
@ -40,7 +40,7 @@ void DecodeVPERMIL2PMask(const Constant *C, unsigned MatchImm, unsigned ElSize,
|
|||||||
void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
|
void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
|
||||||
|
|
||||||
/// Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
|
/// Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
|
||||||
void DecodeVPERMVMask(const Constant *C, MVT VT,
|
void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
|
||||||
SmallVectorImpl<int> &ShuffleMask);
|
SmallVectorImpl<int> &ShuffleMask);
|
||||||
|
|
||||||
/// Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
|
/// Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
|
||||||
|
@ -28,10 +28,6 @@ declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16
|
|||||||
define <8 x double> @combine_permvar_8f64_identity(<8 x double> %x0, <8 x double> %x1) {
|
define <8 x double> @combine_permvar_8f64_identity(<8 x double> %x0, <8 x double> %x1) {
|
||||||
; X32-LABEL: combine_permvar_8f64_identity:
|
; X32-LABEL: combine_permvar_8f64_identity:
|
||||||
; X32: # BB#0:
|
; X32: # BB#0:
|
||||||
; X32-NEXT: vmovapd {{.*#+}} zmm1 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
|
|
||||||
; X32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
|
||||||
; X32-NEXT: vmovapd {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
|
|
||||||
; X32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: combine_permvar_8f64_identity:
|
; X64-LABEL: combine_permvar_8f64_identity:
|
||||||
@ -703,8 +699,7 @@ define <8 x i64> @combine_permvar_8i64_as_permq_mask(<8 x i64> %x0, <8 x i64> %x
|
|||||||
define <8 x double> @combine_permvar_8f64_as_permpd(<8 x double> %x0, <8 x double> %x1) {
|
define <8 x double> @combine_permvar_8f64_as_permpd(<8 x double> %x0, <8 x double> %x1) {
|
||||||
; X32-LABEL: combine_permvar_8f64_as_permpd:
|
; X32-LABEL: combine_permvar_8f64_as_permpd:
|
||||||
; X32: # BB#0:
|
; X32: # BB#0:
|
||||||
; X32-NEXT: vmovapd {{.*#+}} zmm1 = <3,0,2,0,1,0,u,u,u,u,6,0,5,0,4,0>
|
; X32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
|
||||||
; X32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: combine_permvar_8f64_as_permpd:
|
; X64-LABEL: combine_permvar_8f64_as_permpd:
|
||||||
@ -719,8 +714,7 @@ define <8 x double> @combine_permvar_8f64_as_permpd_mask(<8 x double> %x0, <8 x
|
|||||||
; X32: # BB#0:
|
; X32: # BB#0:
|
||||||
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||||
; X32-NEXT: kmovd %eax, %k1
|
; X32-NEXT: kmovd %eax, %k1
|
||||||
; X32-NEXT: vmovapd {{.*#+}} zmm2 = <3,0,2,0,1,0,u,u,u,u,6,0,5,0,4,0>
|
; X32-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4]
|
||||||
; X32-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1}
|
|
||||||
; X32-NEXT: vmovapd %zmm1, %zmm0
|
; X32-NEXT: vmovapd %zmm1, %zmm0
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
|
Loading…
Reference in New Issue
Block a user