From f2b19f9394413d2b04b2492f475e517dda61209a Mon Sep 17 00:00:00 2001 From: Pirama Arumuga Nainar Date: Thu, 20 Oct 2016 17:56:36 +0000 Subject: [PATCH] Fix *_EXTEND_VECTOR_INREG legalization Summary: While promoting *_EXTEND_VECTOR_INREG nodes whose inputs are already promoted, perform the appropriate sign extension for the promoted node before doing the *_EXTEND_VECTOR_INREG operation. If not, the undefined high-order bits of the promoted operand may (a) be garbage inc ase of zext) or (b) contribute the wrong sign-bit (in case of sext) Updated the promote-vec3.ll test after this change. The diff shows explicit zeroing in case of zext and intermediate sign extension in case of sext. Reviewers: RKSimon Subscribers: llvm-commits, srhines Differential Revision: https://reviews.llvm.org/D25790 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284752 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 22 ++++++++++++++++--- test/CodeGen/X86/promote-vec3.ll | 21 ++++++++++-------- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 41ca0b462f6..0669536ec13 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -3349,11 +3349,27 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) { SDLoc dl(N); - // For operands whose TypeAction is to promote, the promoted node to construct - // a new *_EXTEND_VECTOR_INREG node. + // For operands whose TypeAction is to promote, extend the promoted node + // appropriately (ZERO_EXTEND or SIGN_EXTEND) from the original pre-promotion + // type, and then construct a new *_EXTEND_VECTOR_INREG node to the promote-to + // type.. if (getTypeAction(N->getOperand(0).getValueType()) == TargetLowering::TypePromoteInteger) { - SDValue Promoted = GetPromotedInteger(N->getOperand(0)); + SDValue Promoted; + + switch(N->getOpcode()) { + case ISD::SIGN_EXTEND_VECTOR_INREG: + Promoted = SExtPromotedInteger(N->getOperand(0)); + break; + case ISD::ZERO_EXTEND_VECTOR_INREG: + Promoted = ZExtPromotedInteger(N->getOperand(0)); + break; + case ISD::ANY_EXTEND_VECTOR_INREG: + Promoted = GetPromotedInteger(N->getOperand(0)); + break; + default: + llvm_unreachable("Node has unexpected Opcode"); + } return DAG.getNode(N->getOpcode(), dl, NVT, Promoted); } diff --git a/test/CodeGen/X86/promote-vec3.ll b/test/CodeGen/X86/promote-vec3.ll index 3d08f3fe3e5..7a496714622 100644 --- a/test/CodeGen/X86/promote-vec3.ll +++ b/test/CodeGen/X86/promote-vec3.ll @@ -9,16 +9,17 @@ define <3 x i16> @zext_i8(<3 x i8>) { ; SSE3-LABEL: zext_i8: ; SSE3: # BB#0: ; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; SSE3-NEXT: pinsrw $0, %eax, %xmm0 -; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; SSE3-NEXT: pinsrw $1, %eax, %xmm0 -; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; SSE3-NEXT: pinsrw $2, %eax, %xmm0 +; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: pxor %xmm1, %xmm1 -; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE3-NEXT: movd %xmm0, %eax -; SSE3-NEXT: pextrw $2, %xmm0, %edx -; SSE3-NEXT: pextrw $4, %xmm0, %ecx +; SSE3-NEXT: pinsrw $0, %eax, %xmm1 +; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: pinsrw $1, %eax, %xmm1 +; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: pinsrw $2, %eax, %xmm1 +; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE3-NEXT: movd %xmm1, %eax +; SSE3-NEXT: pextrw $2, %xmm1, %edx +; SSE3-NEXT: pextrw $4, %xmm1, %ecx ; SSE3-NEXT: # kill: %AX %AX %EAX ; SSE3-NEXT: # kill: %DX %DX %EDX ; SSE3-NEXT: # kill: %CX %CX %ECX @@ -78,6 +79,8 @@ define <3 x i16> @sext_i8(<3 x i8>) { ; SSE3-NEXT: pinsrw $1, %eax, %xmm0 ; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; SSE3-NEXT: pinsrw $2, %eax, %xmm0 +; SSE3-NEXT: psllw $8, %xmm0 +; SSE3-NEXT: psraw $8, %xmm0 ; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSE3-NEXT: psrad $16, %xmm0 ; SSE3-NEXT: movd %xmm0, %eax