From 29ec93c7b6a00d8797dfcd3b0021e705df7ba045 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 9 Mar 2017 14:06:39 +0000 Subject: [PATCH] [X86][SSE] Speed up constant pool shuffle mask decoding with direct copy (PR32037). If the constants are already the correct size, we can copy them directly into the shuffle mask. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297381 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/X86ShuffleDecodeConstantPool.cpp | 34 +++++++++++++++---- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp index df6ddafa717..9190b66367c 100644 --- a/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp +++ b/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp @@ -49,6 +49,33 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits, unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); unsigned NumCstElts = CstTy->getVectorNumElements(); + assert((CstSizeInBits % MaskEltSizeInBits) == 0 && + "Unaligned shuffle mask size"); + + unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits; + UndefElts = APInt(NumMaskElts, 0); + RawMask.resize(NumMaskElts, 0); + + // Fast path - if the constants match the mask size then copy direct. + if (MaskEltSizeInBits == CstEltSizeInBits) { + assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size"); + for (unsigned i = 0; i != NumMaskElts; ++i) { + Constant *COp = C->getAggregateElement(i); + if (!COp || (!isa(COp) && !isa(COp))) + return false; + + if (isa(COp)) { + UndefElts.setBit(i); + RawMask[i] = 0; + continue; + } + + auto *Elt = cast(COp); + RawMask[i] = Elt->getValue().getZExtValue(); + } + return true; + } + // Extract all the undef/constant element data and pack into single bitsets. APInt UndefBits(CstSizeInBits, 0); APInt MaskBits(CstSizeInBits, 0); @@ -69,13 +96,6 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits, } // Now extract the undef/constant bit data into the raw shuffle masks. - assert((CstSizeInBits % MaskEltSizeInBits) == 0 && - "Unaligned shuffle mask size"); - - unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits; - UndefElts = APInt(NumMaskElts, 0); - RawMask.resize(NumMaskElts, 0); - for (unsigned i = 0; i != NumMaskElts; ++i) { unsigned BitOffset = i * MaskEltSizeInBits; APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset);