diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d74764714f4..694623e1754 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1150,9 +1150,6 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::FNEG, MVT::v4f64, Custom); setOperationAction(ISD::FABS, MVT::v4f64, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); @@ -1160,8 +1157,6 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); - setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); @@ -1194,10 +1189,16 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); if (Subtarget->hasFMA() || Subtarget->hasFMA4()) { setOperationAction(ISD::FMA, MVT::v8f32, Legal); @@ -10391,7 +10392,8 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget, return LowerSIGN_EXTEND_AVX512(Op, DAG); if ((VT != MVT::v4i64 || InVT != MVT::v4i32) && - (VT != MVT::v8i32 || InVT != MVT::v8i16)) + (VT != MVT::v8i32 || InVT != MVT::v8i16) && + (VT != MVT::v16i16 || InVT != MVT::v16i8)) return SDValue(); if (Subtarget->hasInt256()) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 004710b67cc..7cae485b240 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5602,16 +5602,19 @@ let Predicates = [HasAVX2] in { def : Pat<(v4i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>; def : Pat<(v8i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>; + def : Pat<(v16i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>; } let Predicates = [HasAVX] in { def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>; def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; + def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>; } let Predicates = [UseSSE41] in { def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; + def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>; } diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 36bfeb10aa2..f88a666092b 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -401,12 +401,15 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { static const TypeConversionCostTblEntry AVXConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 }, diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll index b69b3bf6304..f3c1283c7e3 100644 --- a/test/Analysis/CostModel/X86/cast.ll +++ b/test/Analysis/CostModel/X86/cast.ll @@ -38,6 +38,10 @@ define i32 @zext_sext(<8 x i1> %in) { ;CHECK: cost of 9 {{.*}} sext %S = sext <8 x i1> %in to <8 x i32> + ;CHECK: cost of 1 {{.*}} zext + %A1 = zext <16 x i8> undef to <16 x i16> + ;CHECK: cost of 1 {{.*}} sext + %A2 = sext <16 x i8> undef to <16 x i16> ;CHECK: cost of 1 {{.*}} sext %A = sext <8 x i16> undef to <8 x i32> ;CHECK: cost of 1 {{.*}} zext @@ -51,11 +55,13 @@ define i32 @zext_sext(<8 x i1> %in) { ;CHECK: cost of 1 {{.*}} zext %D = zext <4 x i32> undef to <4 x i64> - ;CHECK: cost of 1 {{.*}} trunc + ;CHECK: cost of 1 {{.*}} trunc %E = trunc <4 x i64> undef to <4 x i32> ;CHECK: cost of 1 {{.*}} trunc %F = trunc <8 x i32> undef to <8 x i16> + ;CHECK: cost of 2 {{.*}} trunc + %F1 = trunc <16 x i16> undef to <16 x i8> ;CHECK: cost of 3 {{.*}} trunc %G = trunc <8 x i64> undef to <8 x i32> diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll index b9c70005100..fb2287f5289 100644 --- a/test/CodeGen/X86/avx-sext.ll +++ b/test/CodeGen/X86/avx-sext.ll @@ -154,6 +154,17 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) { ret <4 x i64> %extmask } +; AVX-LABEL: sext_16i8_to_16i16 +; AVX: vpmovsxbw +; AVX: vmovhlps +; AVX: vpmovsxbw +; AVX: ret +define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) { + %X = load <16 x i8>* %ptr + %Y = sext <16 x i8> %X to <16 x i16> + ret <16 x i16> %Y +} + ; AVX: sext_4i8_to_4i64 ; AVX: vpslld $24 ; AVX: vpsrad $24 diff --git a/test/CodeGen/X86/avx-trunc.ll b/test/CodeGen/X86/avx-trunc.ll index d0077366444..58d0a356909 100644 --- a/test/CodeGen/X86/avx-trunc.ll +++ b/test/CodeGen/X86/avx-trunc.ll @@ -12,4 +12,9 @@ define <8 x i16> @trunc_32_16(<8 x i32> %A) nounwind uwtable readnone ssp{ %B = trunc <8 x i32> %A to <8 x i16> ret <8 x i16>%B } - +define <16 x i8> @trunc_16_8(<16 x i16> %A) nounwind uwtable readnone ssp{ +; CHECK-LABEL: trunc_16_8 +; CHECK: pshufb + %B = trunc <16 x i16> %A to <16 x i8> + ret <16 x i8> %B +} diff --git a/test/CodeGen/X86/avx2-conversions.ll b/test/CodeGen/X86/avx2-conversions.ll index 0143f18fe24..f49718e4c8b 100644 --- a/test/CodeGen/X86/avx2-conversions.ll +++ b/test/CodeGen/X86/avx2-conversions.ll @@ -72,6 +72,25 @@ define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) { ret <16 x i16> %t } +; CHECK-LABEL: sext_16i8_16i16: +; CHECK: vpmovsxbw +; CHECK-NOT: vinsert +; CHECK: ret +define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) { + %t = sext <16 x i8> %z to <16 x i16> + ret <16 x i16> %t +} + +; CHECK-LABEL: trunc_16i16_16i8: +; CHECK: vpshufb +; CHECK: vpshufb +; CHECK: vpor +; CHECK: ret +define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) { + %t = trunc <16 x i16> %z to <16 x i8> + ret <16 x i8> %t +} + ; CHECK: load_sext_test1 ; CHECK: vpmovsxdq (%r{{[^,]*}}), %ymm{{.*}} ; CHECK: ret diff --git a/test/CodeGen/X86/pmovsx-inreg.ll b/test/CodeGen/X86/pmovsx-inreg.ll index d30d7d0694a..07979f61ddd 100644 --- a/test/CodeGen/X86/pmovsx-inreg.ll +++ b/test/CodeGen/X86/pmovsx-inreg.ll @@ -86,8 +86,7 @@ define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind { ret void ; AVX2-LABEL: test6: -; FIXME: v16i8 -> v16i16 is scalarized. -; AVX2-NOT: pmovsx +; AVX2: vpmovsxbw } define void @test7(<2 x i16>* %in, <2 x i64>* %out) nounwind {