From 6a9a49d7ab6f7f4a8ac27c660b7e98655741988b Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Fri, 5 Dec 2014 20:02:22 +0000 Subject: [PATCH] [X86] Improved lowering of packed vector shifts to vpsllq/vpsrlq. SSE2/AVX non-constant packed shift instructions only use the lower 64-bit of the shift count. This patch teaches function 'getTargetVShiftNode' how to deal with shifts where the shift count node is of type MVT::i64. Before this patch, function 'getTargetVShiftNode' only knew how to deal with shift count nodes of type MVT::i32. This forced the backend to wrongly truncate the shift count to MVT::i32, and then zero-extend it back to MVT::i64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223505 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 27 +++++++++++++++++---------- test/CodeGen/X86/lower-vec-shift-2.ll | 8 -------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 62af6422909..6d5a10feab1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16713,7 +16713,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, SDValue SrcOp, SDValue ShAmt, SelectionDAG &DAG) { - assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32"); + MVT SVT = ShAmt.getSimpleValueType(); + assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!"); // Catch shift-by-constant. if (ConstantSDNode *CShAmt = dyn_cast(ShAmt)) @@ -16728,13 +16729,18 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, case X86ISD::VSRAI: Opc = X86ISD::VSRA; break; } - // Need to build a vector containing shift amount - // Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0 - SDValue ShOps[4]; - ShOps[0] = ShAmt; - ShOps[1] = DAG.getConstant(0, MVT::i32); - ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, ShOps); + // Need to build a vector containing shift amount. + // SSE/AVX packed shifts only use the lower 64-bit of the shift count. + SmallVector ShOps; + ShOps.push_back(ShAmt); + if (SVT == MVT::i32) { + ShOps.push_back(DAG.getConstant(0, SVT)); + ShOps.push_back(DAG.getUNDEF(SVT)); + } + ShOps.push_back(DAG.getUNDEF(SVT)); + + MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64; + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, BVT, ShOps); // The return type has to be a 128-bit type with the same element // type as the input type. @@ -18469,8 +18475,9 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, } if (BaseShAmt.getNode()) { - if (EltVT.bitsGT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt); + assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!"); + if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32)) + BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt); else if (EltVT.bitsLT(MVT::i32)) BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); diff --git a/test/CodeGen/X86/lower-vec-shift-2.ll b/test/CodeGen/X86/lower-vec-shift-2.ll index 46023e49965..90505b6dd8f 100644 --- a/test/CodeGen/X86/lower-vec-shift-2.ll +++ b/test/CodeGen/X86/lower-vec-shift-2.ll @@ -44,14 +44,10 @@ entry: define <2 x i64> @test3(<2 x i64> %A, <2 x i64> %B) { ; SSE2-LABEL: test3: ; SSE2: # BB#0 -; SSE2-NEXT: movd %xmm1, %rax -; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: psllq %xmm1, %xmm0 ; SSE2-NEXT: retq ; AVX-LABEL: test3: ; AVX: # BB#0 -; AVX-NEXT: vmovq %xmm1, %rax -; AVX-NEXT: vmovd %eax, %xmm1 ; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq entry: @@ -103,14 +99,10 @@ entry: define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) { ; SSE2-LABEL: test6: ; SSE2: # BB#0 -; SSE2-NEXT: movd %xmm1, %rax -; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: psrlq %xmm1, %xmm0 ; SSE2-NEXT: retq ; AVX-LABEL: test6: ; AVX: # BB#0 -; AVX-NEXT: vmovq %xmm1, %rax -; AVX-NEXT: vmovd %eax, %xmm1 ; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq entry: