[x86] Teach the instruction lowering to add comments describing constant

pool data being loaded into a vector register.

The comments take the form of:

  # ymm0 = [a,b,c,d,...]
  # xmm1 = <x,y,z...>

The []s are used for generic sequential data and the <>s are used for
specifically ConstantVector loads. Undef elements are printed as the
letter 'u', integers in decimal, and floating point values as floating
point values. Suggestions on improving the formatting or other aspects
of the display are very welcome.

My primary use case for this is to be able to FileCheck test masks
passed to vector shuffle instructions in-register. It isn't fantastic
for that (no decoding special zeroing semantics or other tricks), but it
at least puts the mask onto an instruction line that could reasonably be
checked. I've updated many of the new vector shuffle lowering tests to
leverage this in their test cases so that we're actually checking the
shuffle masks remain as expected.

Before implementing this, I tried a *bunch* of different approaches.
I looked into teaching the MCInstLower code to scan up the basic block
and find a definition of a register used in a shuffle instruction and
then decode that, but this seems incredibly brittle and complex.
I talked to Hal a lot about the "right" way to do this: attach the raw
shuffle mask to the instruction itself in some form of unencoded
operands, and then use that to emit the comments. I still think that's
the optimal solution here, but it proved to be beyond what I'm up for
here. In particular, it seems likely best done by completing the
plumbing of metadata through these layers and attaching the shuffle mask
in metadata which could have fully automatic dropping when encoding an
actual instruction.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218377 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth 2014-09-24 09:39:41 +00:00
parent 35fdc092e0
commit 10cd8098a7
4 changed files with 171 additions and 105 deletions

View File

@ -864,25 +864,23 @@ PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
return --MBBI;
}
static const Constant *getShuffleMaskConstant(const MachineInstr &MI,
const MachineOperand &DstOp,
const MachineOperand &SrcOp,
const MachineOperand &MaskOp) {
if (!MaskOp.isCPI())
static const Constant *getConstantFromPool(const MachineInstr &MI,
const MachineOperand &Op) {
if (!Op.isCPI())
return nullptr;
ArrayRef<MachineConstantPoolEntry> Constants =
MI.getParent()->getParent()->getConstantPool()->getConstants();
const MachineConstantPoolEntry &MaskConstantEntry =
Constants[MaskOp.getIndex()];
const MachineConstantPoolEntry &ConstantEntry =
Constants[Op.getIndex()];
// Bail if this is a machine constant pool entry, we won't be able to dig out
// anything useful.
if (MaskConstantEntry.isMachineConstantPoolEntry())
if (ConstantEntry.isMachineConstantPoolEntry())
return nullptr;
auto *C = dyn_cast<Constant>(MaskConstantEntry.Val.ConstVal);
assert((!C || MaskConstantEntry.getType() == C->getType()) &&
auto *C = dyn_cast<Constant>(ConstantEntry.Val.ConstVal);
assert((!C || ConstantEntry.getType() == C->getType()) &&
"Expected a constant of the same type!");
return C;
}
@ -1109,7 +1107,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &SrcOp = MI->getOperand(1);
const MachineOperand &MaskOp = MI->getOperand(5);
if (auto *C = getShuffleMaskConstant(*MI, DstOp, SrcOp, MaskOp)) {
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
SmallVector<int, 16> Mask;
DecodePSHUFBMask(C, Mask);
if (!Mask.empty())
@ -1129,7 +1127,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &SrcOp = MI->getOperand(1);
const MachineOperand &MaskOp = MI->getOperand(5);
if (auto *C = getShuffleMaskConstant(*MI, DstOp, SrcOp, MaskOp)) {
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
SmallVector<int, 16> Mask;
DecodeVPERMILPMask(C, Mask);
if (!Mask.empty())
@ -1137,6 +1135,74 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
break;
}
// For loads from a constant pool to a vector register, print the constant
// loaded.
case X86::MOVAPDrm:
case X86::VMOVAPDrm:
case X86::VMOVAPDYrm:
case X86::MOVUPDrm:
case X86::VMOVUPDrm:
case X86::VMOVUPDYrm:
case X86::MOVAPSrm:
case X86::VMOVAPSrm:
case X86::VMOVAPSYrm:
case X86::MOVUPSrm:
case X86::VMOVUPSrm:
case X86::VMOVUPSYrm:
case X86::MOVDQArm:
case X86::VMOVDQArm:
case X86::VMOVDQAYrm:
case X86::MOVDQUrm:
case X86::VMOVDQUrm:
case X86::VMOVDQUYrm:
if (!OutStreamer.isVerboseAsm())
break;
if (MI->getNumOperands() > 4)
if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
std::string Comment;
raw_string_ostream CS(Comment);
const MachineOperand &DstOp = MI->getOperand(0);
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
CS << "[";
for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) {
if (i != 0)
CS << ",";
if (CDS->getElementType()->isIntegerTy())
CS << CDS->getElementAsInteger(i);
else if (CDS->getElementType()->isFloatTy())
CS << CDS->getElementAsFloat(i);
else if (CDS->getElementType()->isDoubleTy())
CS << CDS->getElementAsDouble(i);
else
CS << "?";
}
CS << "]";
OutStreamer.AddComment(CS.str());
} else if (auto *CV = dyn_cast<ConstantVector>(C)) {
CS << "<";
for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
if (i != 0)
CS << ",";
Constant *COp = CV->getOperand(i);
if (isa<UndefValue>(COp)) {
CS << "u";
} else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
CS << CI->getZExtValue();
} else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
SmallString<32> Str;
CF->getValueAPF().toString(Str);
CS << Str;
} else {
CS << "?";
}
}
CS << ">";
OutStreamer.AddComment(CS.str());
}
}
break;
}
MCInst TmpInst;

View File

@ -377,7 +377,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_0
; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI16_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -386,7 +386,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_0
; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI16_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -399,7 +399,7 @@ define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_1
; AVX1-LABEL: @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI17_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -408,7 +408,7 @@ define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_1
; AVX2-LABEL: @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI17_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -671,7 +671,7 @@ define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_2
; AVX1-LABEL: @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vmovdqa .LCPI31_0(%rip), %xmm3
; AVX1-NEXT: vmovdqa {{.*}} # xmm3 = [0,1,0,1,4,5,0,1,0,1,0,1,12,13,0,1]
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; AVX1-NEXT: vpshufd {{.*}} # xmm4 = xmm4[0,0,0,0]
@ -685,7 +685,7 @@ define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_2
; AVX2-LABEL: @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX2-NEXT: vmovdqa .LCPI31_0(%rip), %xmm3
; AVX2-NEXT: vmovdqa {{.*}} # xmm3 = [0,1,0,1,4,5,0,1,0,1,0,1,12,13,0,1]
; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
; AVX2-NEXT: vpshufd {{.*}} # xmm4 = xmm4[0,0,0,0]
@ -793,7 +793,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_0
; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI35_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -802,7 +802,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_0
; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI35_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -815,7 +815,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_0
; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI36_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -824,7 +824,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_0
; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI36_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -837,7 +837,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_0
; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI37_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -846,7 +846,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_0
; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI37_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -859,7 +859,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_0
; AVX1-LABEL: @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI38_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -868,7 +868,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_0
; AVX2-LABEL: @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI38_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -881,7 +881,7 @@ define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_0
; AVX1-LABEL: @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI39_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -890,7 +890,7 @@ define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_0
; AVX2-LABEL: @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI39_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -903,7 +903,7 @@ define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_0
; AVX1-LABEL: @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI40_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -912,7 +912,7 @@ define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_0
; AVX2-LABEL: @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI40_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -925,7 +925,7 @@ define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_0
; AVX1-LABEL: @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI41_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -934,7 +934,7 @@ define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_0
; AVX2-LABEL: @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI41_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0

View File

@ -771,7 +771,7 @@ define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_
; AVX1-LABEL: @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI33_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -780,7 +780,7 @@ define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_
; AVX2-LABEL: @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI33_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -793,7 +793,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_
; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI34_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -802,7 +802,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_
; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI34_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -815,7 +815,7 @@ define <32 x i8> @shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_
; AVX1-LABEL: @shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI35_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -824,7 +824,7 @@ define <32 x i8> @shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_
; AVX2-LABEL: @shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI35_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -837,7 +837,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_
; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI36_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -846,7 +846,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_
; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI36_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -859,7 +859,7 @@ define <32 x i8> @shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_
; AVX1-LABEL: @shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI37_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -868,7 +868,7 @@ define <32 x i8> @shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_
; AVX2-LABEL: @shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI37_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -881,7 +881,7 @@ define <32 x i8> @shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_
; AVX1-LABEL: @shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI38_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -890,7 +890,7 @@ define <32 x i8> @shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_
; AVX2-LABEL: @shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI38_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -903,7 +903,7 @@ define <32 x i8> @shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_
; AVX1-LABEL: @shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI39_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -912,7 +912,7 @@ define <32 x i8> @shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_
; AVX2-LABEL: @shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI39_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -1025,10 +1025,10 @@ define <32 x i8> @shuffle_v32i8_32_32_02_02_36_36_06_06_40_40_10_10_44_44_14_14_
; AVX1-LABEL: @shuffle_v32i8_32_32_02_02_36_36_06_06_40_40_10_10_44_44_14_14_48_48_18_18_52_52_22_22_56_56_26_26_60_60_30_30
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa .LCPI46_0(%rip), %xmm3
; AVX1-NEXT: vmovdqa {{.*}} # xmm3 = [6,6,2,2,2,2,6,6,14,14,10,10,10,10,14,14]
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
; AVX1-NEXT: vmovdqa .LCPI46_1(%rip), %xmm5
; AVX1-NEXT: vmovdqa {{.*}} # xmm5 = [0,0,1,1,4,4,5,5,8,8,9,9,12,12,13,13]
; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4
; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm4[0],xmm2[1],xmm4[2],xmm2[3],xmm4[4],xmm2[5],xmm4[6],xmm2[7]
; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
@ -1040,10 +1040,10 @@ define <32 x i8> @shuffle_v32i8_32_32_02_02_36_36_06_06_40_40_10_10_44_44_14_14_
; AVX2-LABEL: @shuffle_v32i8_32_32_02_02_36_36_06_06_40_40_10_10_44_44_14_14_48_48_18_18_52_52_22_22_56_56_26_26_60_60_30_30
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX2-NEXT: vmovdqa .LCPI46_0(%rip), %xmm3
; AVX2-NEXT: vmovdqa {{.*}} # xmm3 = [6,6,2,2,2,2,6,6,14,14,10,10,10,10,14,14]
; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
; AVX2-NEXT: vmovdqa .LCPI46_1(%rip), %xmm5
; AVX2-NEXT: vmovdqa {{.*}} # xmm5 = [0,0,1,1,4,4,5,5,8,8,9,9,12,12,13,13]
; AVX2-NEXT: vpshufb %xmm5, %xmm4, %xmm4
; AVX2-NEXT: vpblendw {{.*}} # xmm2 = xmm4[0],xmm2[1],xmm4[2],xmm2[3],xmm4[4],xmm2[5],xmm4[6],xmm2[7]
; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
@ -1081,7 +1081,7 @@ define <32 x i8> @shuffle_v32i8_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_
; AVX1-LABEL: @shuffle_v32i8_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_16_16_48_48_16_16_48_48_16_16_48_48_16_16_48_48
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vmovdqa .LCPI48_0(%rip), %xmm3
; AVX1-NEXT: vmovdqa {{.*}} # xmm3 = [0,0,0,0,2,2,0,0,0,0,0,0,6,6,0,0]
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; AVX1-NEXT: vpunpcklbw {{.*}} # xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@ -1097,7 +1097,7 @@ define <32 x i8> @shuffle_v32i8_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_
; AVX2-LABEL: @shuffle_v32i8_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_16_16_48_48_16_16_48_48_16_16_48_48_16_16_48_48
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX2-NEXT: vmovdqa .LCPI48_0(%rip), %xmm3
; AVX2-NEXT: vmovdqa {{.*}} # xmm3 = [0,0,0,0,2,2,0,0,0,0,0,0,6,6,0,0]
; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
; AVX2-NEXT: vpunpcklbw {{.*}} # xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@ -1117,7 +1117,7 @@ define <32 x i8> @shuffle_v32i8_32_32_32_32_32_32_32_32_08_08_10_10_12_12_14_14_
; AVX1-LABEL: @shuffle_v32i8_32_32_32_32_32_32_32_32_08_08_10_10_12_12_14_14_48_48_48_48_48_48_48_48_24_24_26_26_28_28_30_30
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa .LCPI49_0(%rip), %xmm3
; AVX1-NEXT: vmovdqa {{.*}} # xmm3 = [8,8,10,10,10,10,11,11,8,8,10,10,12,12,14,14]
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
; AVX1-NEXT: vpunpcklbw {{.*}} # xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@ -1133,7 +1133,7 @@ define <32 x i8> @shuffle_v32i8_32_32_32_32_32_32_32_32_08_08_10_10_12_12_14_14_
; AVX2-LABEL: @shuffle_v32i8_32_32_32_32_32_32_32_32_08_08_10_10_12_12_14_14_48_48_48_48_48_48_48_48_24_24_26_26_28_28_30_30
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX2-NEXT: vmovdqa .LCPI49_0(%rip), %xmm3
; AVX2-NEXT: vmovdqa {{.*}} # xmm3 = [8,8,10,10,10,10,11,11,8,8,10,10,12,12,14,14]
; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
; AVX2-NEXT: vpunpcklbw {{.*}} # xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@ -1153,10 +1153,10 @@ define <32 x i8> @shuffle_v32i8_38_38_36_36_34_34_32_32_14_14_12_12_10_10_08_08_
; AVX1-LABEL: @shuffle_v32i8_38_38_36_36_34_34_32_32_14_14_12_12_10_10_08_08_54_54_52_52_50_50_48_48_30_30_28_28_26_26_24_24
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa .LCPI50_0(%rip), %xmm3
; AVX1-NEXT: vmovdqa {{.*}} # xmm3 = [8,8,10,10,10,10,11,11,14,14,12,12,10,10,8,8]
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
; AVX1-NEXT: vmovdqa .LCPI50_1(%rip), %xmm5
; AVX1-NEXT: vmovdqa {{.*}} # xmm5 = [6,6,4,4,2,2,0,0,4,4,6,6,6,6,7,7]
; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4
; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
@ -1168,10 +1168,10 @@ define <32 x i8> @shuffle_v32i8_38_38_36_36_34_34_32_32_14_14_12_12_10_10_08_08_
; AVX2-LABEL: @shuffle_v32i8_38_38_36_36_34_34_32_32_14_14_12_12_10_10_08_08_54_54_52_52_50_50_48_48_30_30_28_28_26_26_24_24
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX2-NEXT: vmovdqa .LCPI50_0(%rip), %xmm3
; AVX2-NEXT: vmovdqa {{.*}} # xmm3 = [8,8,10,10,10,10,11,11,14,14,12,12,10,10,8,8]
; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
; AVX2-NEXT: vmovdqa .LCPI50_1(%rip), %xmm5
; AVX2-NEXT: vmovdqa {{.*}} # xmm5 = [6,6,4,4,2,2,0,0,4,4,6,6,6,6,7,7]
; AVX2-NEXT: vpshufb %xmm5, %xmm4, %xmm4
; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm4[0,1],xmm2[2,3]
; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
@ -1187,10 +1187,10 @@ define <32 x i8> @shuffle_v32i8_38_38_36_36_34_34_32_32_06_06_04_04_02_02_00_00_
; AVX1-LABEL: @shuffle_v32i8_38_38_36_36_34_34_32_32_06_06_04_04_02_02_00_00_54_54_52_52_50_50_48_48_22_22_20_20_18_18_16_16
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa .LCPI51_0(%rip), %xmm3
; AVX1-NEXT: vmovdqa {{.*}} # xmm3 = [0,0,2,2,2,2,3,3,6,6,4,4,2,2,0,0]
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
; AVX1-NEXT: vmovdqa .LCPI51_1(%rip), %xmm5
; AVX1-NEXT: vmovdqa {{.*}} # xmm5 = [6,6,4,4,2,2,0,0,4,4,6,6,6,6,7,7]
; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4
; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
@ -1202,10 +1202,10 @@ define <32 x i8> @shuffle_v32i8_38_38_36_36_34_34_32_32_06_06_04_04_02_02_00_00_
; AVX2-LABEL: @shuffle_v32i8_38_38_36_36_34_34_32_32_06_06_04_04_02_02_00_00_54_54_52_52_50_50_48_48_22_22_20_20_18_18_16_16
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX2-NEXT: vmovdqa .LCPI51_0(%rip), %xmm3
; AVX2-NEXT: vmovdqa {{.*}} # xmm3 = [0,0,2,2,2,2,3,3,6,6,4,4,2,2,0,0]
; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
; AVX2-NEXT: vmovdqa .LCPI51_1(%rip), %xmm5
; AVX2-NEXT: vmovdqa {{.*}} # xmm5 = [6,6,4,4,2,2,0,0,4,4,6,6,6,6,7,7]
; AVX2-NEXT: vpshufb %xmm5, %xmm4, %xmm4
; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm4[0,1],xmm2[2,3]
; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
@ -1221,7 +1221,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_
; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI52_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -1230,7 +1230,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_
; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI52_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -1243,7 +1243,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_
; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI53_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -1252,7 +1252,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_
; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI53_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -1265,7 +1265,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_
; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI54_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -1274,7 +1274,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_
; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI54_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -1287,7 +1287,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_
; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI55_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -1296,7 +1296,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_
; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI55_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
@ -1309,7 +1309,7 @@ define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
; AVX1-LABEL: @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa .LCPI56_0(%rip), %xmm2
; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -1318,7 +1318,7 @@ define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
; AVX2-LABEL: @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa .LCPI56_0(%rip), %xmm2
; AVX2-NEXT: vmovdqa {{.*}} # xmm2 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0

View File

@ -575,7 +575,7 @@ define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_00000010
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI56_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [0,0,0,0,0,0,1,0]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
@ -592,7 +592,7 @@ define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_00000200
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI57_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [0,0,0,0,0,2,0,0]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
@ -609,7 +609,7 @@ define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_00003000
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI58_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [0,0,0,0,3,0,0,0]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
@ -628,7 +628,7 @@ define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_00040000
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI59_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [0,0,0,4,0,0,0,0]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
@ -647,7 +647,7 @@ define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_00500000
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI60_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [0,0,5,0,0,0,0,0]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
@ -666,7 +666,7 @@ define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_06000000
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI61_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [0,6,0,0,0,0,0,0]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@ -720,7 +720,7 @@ define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_00112233
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI64_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [0,0,1,1,2,2,3,3]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
@ -737,7 +737,7 @@ define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_00001111
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI65_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [0,0,0,0,1,1,1,1]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
@ -888,9 +888,9 @@ define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_08192a3b
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI74_0(%rip), %ymm2
; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = <u,0,u,1,u,2,u,3>
; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vmovdqa .LCPI74_1(%rip), %ymm2
; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = <0,u,1,u,2,u,3,u>
; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq
@ -911,9 +911,9 @@ define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_08991abb
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI75_0(%rip), %ymm2
; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = <0,u,u,u,1,u,u,u>
; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vmovdqa .LCPI75_1(%rip), %ymm2
; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = <u,0,1,1,u,2,3,3>
; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
; AVX2-NEXT: retq
@ -934,7 +934,7 @@ define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_091b2d3f
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI76_0(%rip), %ymm2
; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = <0,u,1,u,2,u,3,u>
; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq
@ -954,7 +954,7 @@ define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_09ab1def
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI77_0(%rip), %ymm2
; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = <0,u,u,u,1,u,u,u>
; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
; AVX2-NEXT: retq
@ -1152,7 +1152,7 @@ define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_00015444
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI91_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [0,0,0,1,5,4,4,4]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
@ -1167,7 +1167,7 @@ define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_00204644
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI92_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [0,0,2,0,4,6,4,4]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
@ -1182,7 +1182,7 @@ define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_03004474
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI93_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [0,3,0,0,4,4,7,4]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
@ -1197,7 +1197,7 @@ define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_10004444
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI94_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [1,0,0,0,4,4,4,4]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
@ -1212,7 +1212,7 @@ define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_22006446
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI95_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [2,2,0,0,6,4,4,6]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
@ -1227,7 +1227,7 @@ define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_33307474
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI96_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [3,3,3,0,7,4,7,4]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
@ -1242,7 +1242,7 @@ define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_32104567
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI97_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [3,2,1,0,4,5,6,7]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
@ -1257,7 +1257,7 @@ define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_00236744
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI98_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [0,0,2,3,6,7,4,4]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
@ -1272,7 +1272,7 @@ define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_00226644
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI99_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [0,0,2,2,6,6,4,4]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
@ -1287,7 +1287,7 @@ define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_10324567
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI100_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [1,0,3,2,4,5,6,7]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
@ -1302,7 +1302,7 @@ define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_11334567
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI101_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [1,1,3,3,4,5,6,7]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
@ -1317,7 +1317,7 @@ define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_01235467
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI102_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [0,1,2,3,5,4,6,7]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
@ -1332,7 +1332,7 @@ define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_01235466
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI103_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [0,1,2,3,5,4,6,6]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
@ -1347,7 +1347,7 @@ define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_002u6u44
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI104_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = <0,0,2,u,6,u,4,4>
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
@ -1362,7 +1362,7 @@ define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_00uu66uu
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI105_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = <0,0,u,u,6,6,u,u>
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
@ -1377,7 +1377,7 @@ define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_103245uu
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI106_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = <1,0,3,2,4,5,u,u>
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
@ -1392,7 +1392,7 @@ define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_1133uu67
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI107_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = <1,1,3,3,u,u,6,7>
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
@ -1407,7 +1407,7 @@ define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_0uu354uu
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI108_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = <0,u,u,3,5,4,u,u>
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
@ -1422,7 +1422,7 @@ define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: @shuffle_v8i32_uuu3uu66
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa .LCPI109_0(%rip), %ymm1
; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = <u,u,u,3,u,u,6,6>
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>