From 23e748045333b05bc5e33611010b58ed8fecd17c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 22 Sep 2019 19:06:13 +0000 Subject: [PATCH] [X86] Update commutable EVEX vcmp patterns to use timm instead of imm. We need to match TargetConstant, not Constant. This was broken in r372338, but we lacked test coverage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@372523 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 12 ++++---- test/CodeGen/X86/avx512-vec-cmp.ll | 46 ++++++++++------------------ test/CodeGen/X86/commute-fcmp.ll | 48 ++++++++++-------------------- 3 files changed, 37 insertions(+), 69 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index cb0d009234b..3558e4d87d3 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2560,26 +2560,26 @@ multiclass avx512_vcmp_common(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, (X86cmpm_imm_commute imm:$cc))>; def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), (_.VT _.RC:$src1), - imm:$cc)), + timm:$cc)), (!cast(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, addr:$src2, (X86cmpm_imm_commute imm:$cc))>; def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)), - (_.VT _.RC:$src1), imm:$cc), + (_.VT _.RC:$src1), timm:$cc), (!cast(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, (X86cmpm_imm_commute imm:$cc))>; def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (X86VBroadcast (_.ScalarLdFrag addr:$src2)), (_.VT _.RC:$src1), - imm:$cc)), + timm:$cc)), (!cast(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, addr:$src2, (X86cmpm_imm_commute imm:$cc))>; @@ -2618,11 +2618,11 @@ defm VCMPPS : avx512_vcmp, // Patterns to select fp compares with load as first operand. let Predicates = [HasAVX512] in { def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, - imm:$cc)), + timm:$cc)), (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute imm:$cc))>; def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, - imm:$cc)), + timm:$cc)), (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute imm:$cc))>; } diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll index a4f2fe5ca15..17a36c122f2 100644 --- a/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/test/CodeGen/X86/avx512-vec-cmp.ll @@ -628,8 +628,7 @@ define <2 x double> @test31_commute(<2 x double> %x, <2 x double> %x1, <2 x doub ; ; SKX-LABEL: test31_commute: ; SKX: ## %bb.0: -; SKX-NEXT: vmovupd (%rdi), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x10,0x17] -; SKX-NEXT: vcmpltpd %xmm0, %xmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x08,0xc2,0xc8,0x01] +; SKX-NEXT: vcmpgtpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x0e] ; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0] ; SKX-NEXT: retq ## encoding: [0xc3] @@ -675,8 +674,7 @@ define <4 x double> @test32_commute(<4 x double> %x, <4 x double> %x1, <4 x doub ; ; SKX-LABEL: test32_commute: ; SKX: ## %bb.0: -; SKX-NEXT: vmovupd (%rdi), %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x10,0x17] -; SKX-NEXT: vcmpltpd %ymm0, %ymm2, %k1 ## encoding: [0x62,0xf1,0xed,0x28,0xc2,0xc8,0x01] +; SKX-NEXT: vcmpgtpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x0e] ; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0] ; SKX-NEXT: retq ## encoding: [0xc3] @@ -701,8 +699,7 @@ define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp define <8 x double> @test33_commute(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind { ; CHECK-LABEL: test33_commute: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovupd (%rdi), %zmm2 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x17] -; CHECK-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01] +; CHECK-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x0e] ; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %y = load <8 x double>, <8 x double>* %yp, align 4 @@ -748,8 +745,7 @@ define <4 x float> @test34_commute(<4 x float> %x, <4 x float> %x1, <4 x float>* ; ; SKX-LABEL: test34_commute: ; SKX: ## %bb.0: -; SKX-NEXT: vmovups (%rdi), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x17] -; SKX-NEXT: vcmpltps %xmm0, %xmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x08,0xc2,0xc8,0x01] +; SKX-NEXT: vcmpgtps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x0e] ; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0] ; SKX-NEXT: retq ## encoding: [0xc3] %y = load <4 x float>, <4 x float>* %yp, align 4 @@ -794,8 +790,7 @@ define <8 x float> @test35_commute(<8 x float> %x, <8 x float> %x1, <8 x float>* ; ; SKX-LABEL: test35_commute: ; SKX: ## %bb.0: -; SKX-NEXT: vmovups (%rdi), %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x17] -; SKX-NEXT: vcmpltps %ymm0, %ymm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x28,0xc2,0xc8,0x01] +; SKX-NEXT: vcmpgtps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x0e] ; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0] ; SKX-NEXT: retq ## encoding: [0xc3] @@ -820,8 +815,7 @@ define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp define <16 x float> @test36_commute(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind { ; CHECK-LABEL: test36_commute: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovups (%rdi), %zmm2 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x17] -; CHECK-NEXT: vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01] +; CHECK-NEXT: vcmpgtps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x0e] ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %y = load <16 x float>, <16 x float>* %yp, align 4 @@ -849,8 +843,7 @@ define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nou define <8 x double> @test37_commute(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind { ; CHECK-LABEL: test37_commute: ; CHECK: ## %bb.0: -; CHECK-NEXT: vbroadcastsd (%rdi), %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x17] -; CHECK-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01] +; CHECK-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e] ; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -902,8 +895,7 @@ define <4 x double> @test38_commute(<4 x double> %x, <4 x double> %x1, double* % ; ; SKX-LABEL: test38_commute: ; SKX: ## %bb.0: -; SKX-NEXT: vbroadcastsd (%rdi), %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x17] -; SKX-NEXT: vcmpltpd %ymm0, %ymm2, %k1 ## encoding: [0x62,0xf1,0xed,0x28,0xc2,0xc8,0x01] +; SKX-NEXT: vcmpgtpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x0e] ; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0] ; SKX-NEXT: retq ## encoding: [0xc3] @@ -959,9 +951,7 @@ define <2 x double> @test39_commute(<2 x double> %x, <2 x double> %x1, double* % ; ; SKX-LABEL: test39_commute: ; SKX: ## %bb.0: -; SKX-NEXT: vmovddup (%rdi), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x17] -; SKX-NEXT: ## xmm2 = mem[0,0] -; SKX-NEXT: vcmpltpd %xmm0, %xmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x08,0xc2,0xc8,0x01] +; SKX-NEXT: vcmpgtpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x0e] ; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0] ; SKX-NEXT: retq ## encoding: [0xc3] @@ -994,8 +984,7 @@ define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) n define <16 x float> @test40_commute(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind { ; CHECK-LABEL: test40_commute: ; CHECK: ## %bb.0: -; CHECK-NEXT: vbroadcastss (%rdi), %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0x17] -; CHECK-NEXT: vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01] +; CHECK-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e] ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1047,8 +1036,7 @@ define <8 x float> @test41_commute(<8 x float> %x, <8 x float> %x1, float* %p ; ; SKX-LABEL: test41_commute: ; SKX: ## %bb.0: -; SKX-NEXT: vbroadcastss (%rdi), %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x17] -; SKX-NEXT: vcmpltps %ymm0, %ymm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x28,0xc2,0xc8,0x01] +; SKX-NEXT: vcmpgtps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x0e] ; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0] ; SKX-NEXT: retq ## encoding: [0xc3] @@ -1102,8 +1090,7 @@ define <4 x float> @test42_commute(<4 x float> %x, <4 x float> %x1, float* %p ; ; SKX-LABEL: test42_commute: ; SKX: ## %bb.0: -; SKX-NEXT: vbroadcastss (%rdi), %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x17] -; SKX-NEXT: vcmpltps %xmm0, %xmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x08,0xc2,0xc8,0x01] +; SKX-NEXT: vcmpgtps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x0e] ; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0] ; SKX-NEXT: retq ## encoding: [0xc3] @@ -1157,8 +1144,7 @@ define <8 x double> @test43_commute(<8 x double> %x, <8 x double> %x1, double* % ; KNL: ## %bb.0: ; KNL-NEXT: vpmovsxwq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd2] ; KNL-NEXT: vpsllq $63, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf2,0x3f] -; KNL-NEXT: vbroadcastsd (%rdi), %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x1f] -; KNL-NEXT: vcmpltpd %zmm0, %zmm3, %k1 ## encoding: [0x62,0xf1,0xe5,0x48,0xc2,0xc8,0x01] +; KNL-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e] ; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x27,0xca] ; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0] ; KNL-NEXT: retq ## encoding: [0xc3] @@ -1167,8 +1153,7 @@ define <8 x double> @test43_commute(<8 x double> %x, <8 x double> %x1, double* % ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpsllw $15, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x71,0xf2,0x0f] ; AVX512BW-NEXT: vpmovw2m %zmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x29,0xca] -; AVX512BW-NEXT: vbroadcastsd (%rdi), %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x17] -; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0xed,0x49,0xc2,0xc8,0x01] +; AVX512BW-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x0e] ; AVX512BW-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0] ; AVX512BW-NEXT: retq ## encoding: [0xc3] ; @@ -1176,8 +1161,7 @@ define <8 x double> @test43_commute(<8 x double> %x, <8 x double> %x1, double* % ; SKX: ## %bb.0: ; SKX-NEXT: vpsllw $15, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf2,0x0f] ; SKX-NEXT: vpmovw2m %xmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xca] -; SKX-NEXT: vbroadcastsd (%rdi), %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x17] -; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0xed,0x49,0xc2,0xc8,0x01] +; SKX-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x0e] ; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0] ; SKX-NEXT: retq ## encoding: [0xc3] diff --git a/test/CodeGen/X86/commute-fcmp.ll b/test/CodeGen/X86/commute-fcmp.ll index ba99a50f58c..390cec3a571 100644 --- a/test/CodeGen/X86/commute-fcmp.ll +++ b/test/CodeGen/X86/commute-fcmp.ll @@ -806,8 +806,7 @@ define <16 x i32> @commute_cmpps_eq_zmm(<16 x float>* %a0, <16 x float> %a1) { ; ; AVX512-LABEL: commute_cmpps_eq_zmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovaps (%rdi), %zmm1 -; AVX512-NEXT: vcmpeqps %zmm0, %zmm1, %k0 +; AVX512-NEXT: vcmpeqps (%rdi), %zmm0, %k0 ; AVX512-NEXT: vpmovm2d %k0, %zmm0 ; AVX512-NEXT: retq %1 = load <16 x float>, <16 x float>* %a0 @@ -833,8 +832,7 @@ define <16 x i32> @commute_cmpps_ne_zmm(<16 x float>* %a0, <16 x float> %a1) { ; ; AVX512-LABEL: commute_cmpps_ne_zmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovaps (%rdi), %zmm1 -; AVX512-NEXT: vcmpneqps %zmm0, %zmm1, %k0 +; AVX512-NEXT: vcmpneqps (%rdi), %zmm0, %k0 ; AVX512-NEXT: vpmovm2d %k0, %zmm0 ; AVX512-NEXT: retq %1 = load <16 x float>, <16 x float>* %a0 @@ -860,8 +858,7 @@ define <16 x i32> @commute_cmpps_ord_zmm(<16 x float>* %a0, <16 x float> %a1) { ; ; AVX512-LABEL: commute_cmpps_ord_zmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovaps (%rdi), %zmm1 -; AVX512-NEXT: vcmpordps %zmm0, %zmm1, %k0 +; AVX512-NEXT: vcmpordps (%rdi), %zmm0, %k0 ; AVX512-NEXT: vpmovm2d %k0, %zmm0 ; AVX512-NEXT: retq %1 = load <16 x float>, <16 x float>* %a0 @@ -887,8 +884,7 @@ define <16 x i32> @commute_cmpps_uno_zmm(<16 x float>* %a0, <16 x float> %a1) { ; ; AVX512-LABEL: commute_cmpps_uno_zmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovaps (%rdi), %zmm1 -; AVX512-NEXT: vcmpunordps %zmm0, %zmm1, %k0 +; AVX512-NEXT: vcmpunordps (%rdi), %zmm0, %k0 ; AVX512-NEXT: vpmovm2d %k0, %zmm0 ; AVX512-NEXT: retq %1 = load <16 x float>, <16 x float>* %a0 @@ -930,8 +926,7 @@ define <16 x i32> @commute_cmpps_ueq_zmm(<16 x float>* %a0, <16 x float> %a1) { ; ; AVX512-LABEL: commute_cmpps_ueq_zmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovaps (%rdi), %zmm1 -; AVX512-NEXT: vcmpeq_uqps %zmm0, %zmm1, %k0 +; AVX512-NEXT: vcmpeq_uqps (%rdi), %zmm0, %k0 ; AVX512-NEXT: vpmovm2d %k0, %zmm0 ; AVX512-NEXT: retq %1 = load <16 x float>, <16 x float>* %a0 @@ -973,8 +968,7 @@ define <16 x i32> @commute_cmpps_one_zmm(<16 x float>* %a0, <16 x float> %a1) { ; ; AVX512-LABEL: commute_cmpps_one_zmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovaps (%rdi), %zmm1 -; AVX512-NEXT: vcmpneq_oqps %zmm0, %zmm1, %k0 +; AVX512-NEXT: vcmpneq_oqps (%rdi), %zmm0, %k0 ; AVX512-NEXT: vpmovm2d %k0, %zmm0 ; AVX512-NEXT: retq %1 = load <16 x float>, <16 x float>* %a0 @@ -1010,8 +1004,7 @@ define <16 x i32> @commute_cmpps_lt_zmm(<16 x float>* %a0, <16 x float> %a1) { ; ; AVX512-LABEL: commute_cmpps_lt_zmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovaps (%rdi), %zmm1 -; AVX512-NEXT: vcmpltps %zmm0, %zmm1, %k0 +; AVX512-NEXT: vcmpgtps (%rdi), %zmm0, %k0 ; AVX512-NEXT: vpmovm2d %k0, %zmm0 ; AVX512-NEXT: retq %1 = load <16 x float>, <16 x float>* %a0 @@ -1047,8 +1040,7 @@ define <16 x i32> @commute_cmpps_le_zmm(<16 x float>* %a0, <16 x float> %a1) { ; ; AVX512-LABEL: commute_cmpps_le_zmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovaps (%rdi), %zmm1 -; AVX512-NEXT: vcmpleps %zmm0, %zmm1, %k0 +; AVX512-NEXT: vcmpgeps (%rdi), %zmm0, %k0 ; AVX512-NEXT: vpmovm2d %k0, %zmm0 ; AVX512-NEXT: retq %1 = load <16 x float>, <16 x float>* %a0 @@ -1074,8 +1066,7 @@ define <8 x i64> @commute_cmppd_eq_zmmm(<8 x double>* %a0, <8 x double> %a1) { ; ; AVX512-LABEL: commute_cmppd_eq_zmmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovapd (%rdi), %zmm1 -; AVX512-NEXT: vcmpeqpd %zmm0, %zmm1, %k0 +; AVX512-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 ; AVX512-NEXT: vpmovm2q %k0, %zmm0 ; AVX512-NEXT: retq %1 = load <8 x double>, <8 x double>* %a0 @@ -1101,8 +1092,7 @@ define <8 x i64> @commute_cmppd_ne_zmmm(<8 x double>* %a0, <8 x double> %a1) { ; ; AVX512-LABEL: commute_cmppd_ne_zmmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovapd (%rdi), %zmm1 -; AVX512-NEXT: vcmpneqpd %zmm0, %zmm1, %k0 +; AVX512-NEXT: vcmpneqpd (%rdi), %zmm0, %k0 ; AVX512-NEXT: vpmovm2q %k0, %zmm0 ; AVX512-NEXT: retq %1 = load <8 x double>, <8 x double>* %a0 @@ -1128,8 +1118,7 @@ define <8 x i64> @commute_cmppd_ord_zmmm(<8 x double>* %a0, <8 x double> %a1) { ; ; AVX512-LABEL: commute_cmppd_ord_zmmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovapd (%rdi), %zmm1 -; AVX512-NEXT: vcmpordpd %zmm0, %zmm1, %k0 +; AVX512-NEXT: vcmpordpd (%rdi), %zmm0, %k0 ; AVX512-NEXT: vpmovm2q %k0, %zmm0 ; AVX512-NEXT: retq %1 = load <8 x double>, <8 x double>* %a0 @@ -1155,8 +1144,7 @@ define <8 x i64> @commute_cmppd_uno_zmmm(<8 x double>* %a0, <8 x double> %a1) { ; ; AVX512-LABEL: commute_cmppd_uno_zmmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovapd (%rdi), %zmm1 -; AVX512-NEXT: vcmpunordpd %zmm0, %zmm1, %k0 +; AVX512-NEXT: vcmpunordpd (%rdi), %zmm0, %k0 ; AVX512-NEXT: vpmovm2q %k0, %zmm0 ; AVX512-NEXT: retq %1 = load <8 x double>, <8 x double>* %a0 @@ -1198,8 +1186,7 @@ define <8 x i64> @commute_cmppd_ueq_zmmm(<8 x double>* %a0, <8 x double> %a1) { ; ; AVX512-LABEL: commute_cmppd_ueq_zmmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovapd (%rdi), %zmm1 -; AVX512-NEXT: vcmpeq_uqpd %zmm0, %zmm1, %k0 +; AVX512-NEXT: vcmpeq_uqpd (%rdi), %zmm0, %k0 ; AVX512-NEXT: vpmovm2q %k0, %zmm0 ; AVX512-NEXT: retq %1 = load <8 x double>, <8 x double>* %a0 @@ -1241,8 +1228,7 @@ define <8 x i64> @commute_cmppd_one_zmmm(<8 x double>* %a0, <8 x double> %a1) { ; ; AVX512-LABEL: commute_cmppd_one_zmmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovapd (%rdi), %zmm1 -; AVX512-NEXT: vcmpneq_oqpd %zmm0, %zmm1, %k0 +; AVX512-NEXT: vcmpneq_oqpd (%rdi), %zmm0, %k0 ; AVX512-NEXT: vpmovm2q %k0, %zmm0 ; AVX512-NEXT: retq %1 = load <8 x double>, <8 x double>* %a0 @@ -1278,8 +1264,7 @@ define <8 x i64> @commute_cmppd_lt_zmmm(<8 x double>* %a0, <8 x double> %a1) { ; ; AVX512-LABEL: commute_cmppd_lt_zmmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovapd (%rdi), %zmm1 -; AVX512-NEXT: vcmpltpd %zmm0, %zmm1, %k0 +; AVX512-NEXT: vcmpgtpd (%rdi), %zmm0, %k0 ; AVX512-NEXT: vpmovm2q %k0, %zmm0 ; AVX512-NEXT: retq %1 = load <8 x double>, <8 x double>* %a0 @@ -1315,8 +1300,7 @@ define <8 x i64> @commute_cmppd_le_zmmm(<8 x double>* %a0, <8 x double> %a1) { ; ; AVX512-LABEL: commute_cmppd_le_zmmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovapd (%rdi), %zmm1 -; AVX512-NEXT: vcmplepd %zmm0, %zmm1, %k0 +; AVX512-NEXT: vcmpgepd (%rdi), %zmm0, %k0 ; AVX512-NEXT: vpmovm2q %k0, %zmm0 ; AVX512-NEXT: retq %1 = load <8 x double>, <8 x double>* %a0