From 2266c1322bfcebc4a4d2e73cc134070e7e9159b1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 31 Dec 2016 00:45:06 +0000 Subject: [PATCH] [InstCombine][AVX-512] Teach InstCombine that llvm.x86.avx512.vcomi.sd and llvm.x86.avx512.vcomi.ss don't use the upper elements of their input. This was already done for the SSE/SSE2 version of the intrinsics. llvm-svn: 290776 --- .../InstCombine/InstCombineCalls.cpp | 2 + test/Transforms/InstCombine/x86-avx512.ll | 38 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 25f692c6fb9..92369bd70b1 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1775,6 +1775,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_ucomile_sd: case Intrinsic::x86_sse2_ucomilt_sd: case Intrinsic::x86_sse2_ucomineq_sd: + case Intrinsic::x86_avx512_vcomi_ss: + case Intrinsic::x86_avx512_vcomi_sd: case Intrinsic::x86_avx512_mask_cmp_ss: case Intrinsic::x86_avx512_mask_cmp_sd: { // These intrinsics only demand the 0th element of their input vectors. If diff --git a/test/Transforms/InstCombine/x86-avx512.ll b/test/Transforms/InstCombine/x86-avx512.ll index 2433a9de41b..d2a2580d8c2 100644 --- a/test/Transforms/InstCombine/x86-avx512.ll +++ b/test/Transforms/InstCombine/x86-avx512.ll @@ -2753,3 +2753,41 @@ define <8 x double> @test_div_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 %1 = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8) ret <8 x double> %1 } + +declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32) + +define i32 @test_comi_ss_0(float %a, float %b) { +; CHECK-LABEL: @test_comi_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i32 0, i32 4) +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 + %9 = tail call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %4, <4 x float> %8, i32 0, i32 4) + ret i32 %9 +} + +declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32) + +define i32 @test_comi_sd_0(double %a, double %b) { +; CHECK-LABEL: @test_comi_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i32 0, i32 4) +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %2, <2 x double> %4, i32 0, i32 4) + ret i32 %5 +}