mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-09 13:41:47 +00:00
[X86] Improved lowering of packed v8i16 vector shifts by non-constant count.
Before this patch, the backend sub-optimally expanded the non-constant shift count of a v8i16 shift into a sequence of two 'movd' plus 'movzwl'. With this patch the backend checks if the target features sse4.1. If so, then it lets the shuffle legalizer deal with the expansion of the shift amount. Example: ;; define <8 x i16> @test(<8 x i16> %A, <8 x i16> %B) { %shamt = shufflevector <8 x i16> %B, <8 x i16> undef, <8 x i32> zeroinitializer %shl = shl <8 x i16> %A, %shamt ret <8 x i16> %shl } ;; Before (with -mattr=+avx): vmovd %xmm1, %eax movzwl %ax, %eax vmovd %eax, %xmm1 vpsllw %xmm1, %xmm0, %xmm0 retq Now: vpxor %xmm2, %xmm2, %xmm2 vpblendw $1, %xmm1, %xmm2, %xmm1 vpsllw %xmm1, %xmm0, %xmm0 retq git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223660 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
968f0454b8
commit
ae16ff1c42
@ -16720,6 +16720,15 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
|
||||
case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
|
||||
}
|
||||
|
||||
const X86Subtarget &Subtarget =
|
||||
DAG.getTarget().getSubtarget<X86Subtarget>();
|
||||
if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
|
||||
ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
|
||||
// Let the shuffle legalizer expand this shift amount node.
|
||||
SDValue Op0 = ShAmt.getOperand(0);
|
||||
Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op0), MVT::v8i16, Op0);
|
||||
ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, &Subtarget, DAG);
|
||||
} else {
|
||||
// Need to build a vector containing shift amount.
|
||||
// SSE/AVX packed shifts only use the lower 64-bit of the shift count.
|
||||
SmallVector<SDValue, 4> ShOps;
|
||||
@ -16732,6 +16741,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
|
||||
|
||||
MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
|
||||
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, BVT, ShOps);
|
||||
}
|
||||
|
||||
// The return type has to be a 128-bit type with the same element
|
||||
// type as the input type.
|
||||
|
@ -11,9 +11,8 @@ define <8 x i16> @test1(<8 x i16> %A, <8 x i16> %B) {
|
||||
; SSE2-NEXT: retq
|
||||
; AVX-LABEL: test1:
|
||||
; AVX: # BB#0
|
||||
; AVX-NEXT: vmovd %xmm1, %eax
|
||||
; AVX-NEXT: movzwl %ax, %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm1
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
@ -66,9 +65,8 @@ define <8 x i16> @test4(<8 x i16> %A, <8 x i16> %B) {
|
||||
; SSE2-NEXT: retq
|
||||
; AVX-LABEL: test4:
|
||||
; AVX: # BB#0
|
||||
; AVX-NEXT: vmovd %xmm1, %eax
|
||||
; AVX-NEXT: movzwl %ax, %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm1
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
@ -121,9 +119,8 @@ define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) {
|
||||
; SSE2-NEXT: retq
|
||||
; AVX-LABEL: test7:
|
||||
; AVX: # BB#0
|
||||
; AVX-NEXT: vmovd %xmm1, %eax
|
||||
; AVX-NEXT: movzwl %ax, %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm1
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
|
Loading…
Reference in New Issue
Block a user