mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 21:00:29 +00:00
SDAG: Legalize vector BSWAP into a shuffle if the shuffle is legal but the bswap not.
- On ARM/ARM64 we get a vrev because the shuffle matching code is really smart. We still unroll anything that's not v4i32 though. - On X86 we get a pshufb with SSSE3. Required more cleverness in isShuffleMaskLegal. - On PPC we get a vperm for v8i16 and v4i32. v2i64 is unrolled. llvm-svn: 209123
This commit is contained in:
parent
ecbf9efc4d
commit
600e24a1cb
@ -63,6 +63,8 @@ class VectorLegalizer {
|
||||
SDValue ExpandUINT_TO_FLOAT(SDValue Op);
|
||||
// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
|
||||
SDValue ExpandSEXTINREG(SDValue Op);
|
||||
// Expand bswap of vectors into a shuffle if legal.
|
||||
SDValue ExpandBSWAP(SDValue Op);
|
||||
// Implement vselect in terms of XOR, AND, OR when blend is not supported
|
||||
// by the target.
|
||||
SDValue ExpandVSELECT(SDValue Op);
|
||||
@ -297,6 +299,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
||||
case TargetLowering::Expand:
|
||||
if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG)
|
||||
Result = ExpandSEXTINREG(Op);
|
||||
else if (Node->getOpcode() == ISD::BSWAP)
|
||||
Result = ExpandBSWAP(Op);
|
||||
else if (Node->getOpcode() == ISD::VSELECT)
|
||||
Result = ExpandVSELECT(Op);
|
||||
else if (Node->getOpcode() == ISD::SELECT)
|
||||
@ -682,6 +686,29 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
|
||||
return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
|
||||
}
|
||||
|
||||
SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
// Generate a byte wise shuffle mask for the BSWAP.
|
||||
SmallVector<int, 16> ShuffleMask;
|
||||
int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
|
||||
for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
|
||||
for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
|
||||
ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
|
||||
|
||||
EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
|
||||
|
||||
// Only emit a shuffle if the mask is legal.
|
||||
if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
|
||||
return DAG.UnrollVectorOp(Op.getNode());
|
||||
|
||||
SDLoc DL(Op);
|
||||
Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
|
||||
Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
|
||||
ShuffleMask.data());
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
|
||||
}
|
||||
|
||||
SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
|
||||
// Implement VSELECT in terms of XOR, AND, OR
|
||||
// on platforms which do not support blend natively.
|
||||
|
@ -520,6 +520,8 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
|
||||
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
||||
setOperationAction(ISD::MULHU, VT, Expand);
|
||||
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
|
||||
|
||||
setOperationAction(ISD::BSWAP, VT, Expand);
|
||||
}
|
||||
|
||||
// There is no v1i64/v2i64 multiply, expand v1i64/v2i64 to GPR i64 multiply.
|
||||
|
@ -414,6 +414,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
||||
setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
|
||||
setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
|
||||
setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
|
||||
|
||||
setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
|
||||
|
@ -450,6 +450,8 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
|
||||
setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
|
||||
setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
|
||||
|
||||
setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
|
||||
|
||||
for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
|
||||
InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
|
||||
setTruncStoreAction((MVT::SimpleValueType)VT,
|
||||
|
@ -460,6 +460,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
|
||||
setOperationAction(ISD::SDIVREM, VT, Expand);
|
||||
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
|
||||
setOperationAction(ISD::FPOW, VT, Expand);
|
||||
setOperationAction(ISD::BSWAP, VT, Expand);
|
||||
setOperationAction(ISD::CTPOP, VT, Expand);
|
||||
setOperationAction(ISD::CTLZ, VT, Expand);
|
||||
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
|
||||
|
@ -15116,7 +15116,23 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
|
||||
if (VT.getSizeInBits() == 64)
|
||||
return false;
|
||||
|
||||
// FIXME: pshufb, blends, shifts.
|
||||
// If this is a single-input shuffle with no 128 bit lane crossings we can
|
||||
// lower it into pshufb.
|
||||
if ((SVT.is128BitVector() && Subtarget->hasSSSE3()) ||
|
||||
(SVT.is256BitVector() && Subtarget->hasInt256())) {
|
||||
bool isLegal = true;
|
||||
for (unsigned I = 0, E = M.size(); I != E; ++I) {
|
||||
if (M[I] >= (int)SVT.getVectorNumElements() ||
|
||||
ShuffleCrosses128bitLane(SVT, I, M[I])) {
|
||||
isLegal = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isLegal)
|
||||
return true;
|
||||
}
|
||||
|
||||
// FIXME: blends, shifts.
|
||||
return (SVT.getVectorNumElements() == 2 ||
|
||||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
|
||||
isMOVLMask(M, SVT) ||
|
||||
|
@ -178,3 +178,11 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
|
||||
; CHECK-LABEL: test_vrev32_bswap:
|
||||
; CHECK: vrev32.8
|
||||
%bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
|
||||
ret <4 x i32> %bswap
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
|
||||
|
@ -222,3 +222,14 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
|
||||
; CHECK-LABEL: test_vrev32_bswap:
|
||||
; CHECK: rev32.16b
|
||||
; CHECK-NOT: rev
|
||||
; CHECK: ret
|
||||
%bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
|
||||
ret <4 x i32> %bswap
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
|
||||
|
@ -1,19 +1,127 @@
|
||||
; RUN: llc < %s -mcpu=x86_64 | FileCheck %s
|
||||
; RUN: llc < %s -mcpu=x86-64 | FileCheck %s -check-prefix=CHECK-NOSSSE3
|
||||
; RUN: llc < %s -mcpu=core2 | FileCheck %s -check-prefix=CHECK-SSSE3
|
||||
; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK-AVX2
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
|
||||
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
|
||||
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
|
||||
|
||||
define <2 x i64> @foo(<2 x i64> %v) #0 {
|
||||
define <8 x i16> @test1(<8 x i16> %v) #0 {
|
||||
entry:
|
||||
%r = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %v)
|
||||
ret <8 x i16> %r
|
||||
|
||||
; CHECK-NOSSSE3-LABEL: @test1
|
||||
; CHECK-NOSSSE3: rolw
|
||||
; CHECK-NOSSSE3: rolw
|
||||
; CHECK-NOSSSE3: rolw
|
||||
; CHECK-NOSSSE3: rolw
|
||||
; CHECK-NOSSSE3: rolw
|
||||
; CHECK-NOSSSE3: rolw
|
||||
; CHECK-NOSSSE3: rolw
|
||||
; CHECK-NOSSSE3: rolw
|
||||
; CHECK-NOSSSE3: retq
|
||||
|
||||
; CHECK-SSSE3-LABEL: @test1
|
||||
; CHECK-SSSE3: pshufb
|
||||
; CHECK-SSSE3-NEXT: retq
|
||||
|
||||
; CHECK-AVX2-LABEL: @test1
|
||||
; CHECK-AVX2: vpshufb
|
||||
; CHECK-AVX2-NEXT: retq
|
||||
}
|
||||
|
||||
define <4 x i32> @test2(<4 x i32> %v) #0 {
|
||||
entry:
|
||||
%r = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %v)
|
||||
ret <4 x i32> %r
|
||||
|
||||
; CHECK-NOSSSE3-LABEL: @test2
|
||||
; CHECK-NOSSSE3: bswapl
|
||||
; CHECK-NOSSSE3: bswapl
|
||||
; CHECK-NOSSSE3: bswapl
|
||||
; CHECK-NOSSSE3: bswapl
|
||||
; CHECK-NOSSSE3: retq
|
||||
|
||||
; CHECK-SSSE3-LABEL: @test2
|
||||
; CHECK-SSSE3: pshufb
|
||||
; CHECK-SSSE3-NEXT: retq
|
||||
|
||||
; CHECK-AVX2-LABEL: @test2
|
||||
; CHECK-AVX2: vpshufb
|
||||
; CHECK-AVX2-NEXT: retq
|
||||
}
|
||||
|
||||
define <2 x i64> @test3(<2 x i64> %v) #0 {
|
||||
entry:
|
||||
%r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v)
|
||||
ret <2 x i64> %r
|
||||
|
||||
; CHECK-NOSSSE3-LABEL: @test3
|
||||
; CHECK-NOSSSE3: bswapq
|
||||
; CHECK-NOSSSE3: bswapq
|
||||
; CHECK-NOSSSE3: retq
|
||||
|
||||
; CHECK-SSSE3-LABEL: @test3
|
||||
; CHECK-SSSE3: pshufb
|
||||
; CHECK-SSSE3-NEXT: retq
|
||||
|
||||
; CHECK-AVX2-LABEL: @test3
|
||||
; CHECK-AVX2: vpshufb
|
||||
; CHECK-AVX2-NEXT: retq
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
|
||||
declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
|
||||
declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
|
||||
|
||||
define <16 x i16> @test4(<16 x i16> %v) #0 {
|
||||
entry:
|
||||
%r = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %v)
|
||||
ret <16 x i16> %r
|
||||
|
||||
; CHECK-SSSE3-LABEL: @test4
|
||||
; CHECK-SSSE3: pshufb
|
||||
; CHECK-SSSE3: pshufb
|
||||
; CHECK-SSSE3-NEXT: retq
|
||||
|
||||
; CHECK-AVX2-LABEL: @test4
|
||||
; CHECK-AVX2: vpshufb
|
||||
; CHECK-AVX2-NEXT: retq
|
||||
}
|
||||
|
||||
define <8 x i32> @test5(<8 x i32> %v) #0 {
|
||||
entry:
|
||||
%r = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %v)
|
||||
ret <8 x i32> %r
|
||||
|
||||
; CHECK-SSSE3-LABEL: @test5
|
||||
; CHECK-SSSE3: pshufb
|
||||
; CHECK-SSSE3: pshufb
|
||||
; CHECK-SSSE3-NEXT: retq
|
||||
|
||||
; CHECK-AVX2-LABEL: @test5
|
||||
; CHECK-AVX2: vpshufb
|
||||
; CHECK-AVX2-NEXT: retq
|
||||
}
|
||||
|
||||
define <4 x i64> @test6(<4 x i64> %v) #0 {
|
||||
entry:
|
||||
%r = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %v)
|
||||
ret <4 x i64> %r
|
||||
|
||||
; CHECK-SSSE3-LABEL: @test6
|
||||
; CHECK-SSSE3: pshufb
|
||||
; CHECK-SSSE3: pshufb
|
||||
; CHECK-SSSE3-NEXT: retq
|
||||
|
||||
; CHECK-AVX2-LABEL: @test6
|
||||
; CHECK-AVX2: vpshufb
|
||||
; CHECK-AVX2-NEXT: retq
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @foo
|
||||
; CHECK: bswapq
|
||||
; CHECK: bswapq
|
||||
; CHECK: retq
|
||||
|
||||
attributes #0 = { nounwind uwtable }
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user