[AVX-512] Add patterns to select masked logical operations if the select has a floating point type.

This is needed in order to replace the masked floating point logical op intrinsics with native IR.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280195 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2016-08-31 05:37:52 +00:00
parent 2699e28795
commit f903ac6796
3 changed files with 306 additions and 192 deletions

View File

@ -4235,6 +4235,216 @@ defm VOR : avx512_fp_binop_p<0x56, "vor", X86for, HasDQI,
defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, HasDQI,
SSE_ALU_ITINS_P, 1>;
// Patterns catch floating point selects with bitcasted integer logic ops.
let Predicates = [HasVLX] in {
def : Pat<(v4f32 (vselect VK4WM:$mask,
(bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
VR128X:$src0)),
(VPANDDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v4f32 (vselect VK4WM:$mask,
(bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
VR128X:$src0)),
(VPORDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v4f32 (vselect VK4WM:$mask,
(bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
VR128X:$src0)),
(VPXORDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v4f32 (vselect VK4WM:$mask,
(bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
VR128X:$src0)),
(VPANDNDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1,
VR128X:$src2)>;
def : Pat<(v4f32 (vselect VK4WM:$mask,
(bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
(bitconvert (v4i32 immAllZerosV)))),
(VPANDDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v4f32 (vselect VK4WM:$mask,
(bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
(bitconvert (v4i32 immAllZerosV)))),
(VPORDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v4f32 (vselect VK4WM:$mask,
(bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
(bitconvert (v4i32 immAllZerosV)))),
(VPXORDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v4f32 (vselect VK4WM:$mask,
(bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
(bitconvert (v4i32 immAllZerosV)))),
(VPANDNDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
(bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
VR128X:$src0)),
(VPANDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
(bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
VR128X:$src0)),
(VPORQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
(bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
VR128X:$src0)),
(VPXORQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
(bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
VR128X:$src0)),
(VPANDNQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
(bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
(bitconvert (v4i32 immAllZerosV)))),
(VPANDQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
(bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
(bitconvert (v4i32 immAllZerosV)))),
(VPORQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
(bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
(bitconvert (v4i32 immAllZerosV)))),
(VPXORQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
(bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
(bitconvert (v4i32 immAllZerosV)))),
(VPANDNQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v8f32 (vselect VK8WM:$mask,
(bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
VR256X:$src0)),
(VPANDDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v8f32 (vselect VK8WM:$mask,
(bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
VR256X:$src0)),
(VPORDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v8f32 (vselect VK8WM:$mask,
(bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
VR256X:$src0)),
(VPXORDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v8f32 (vselect VK8WM:$mask,
(bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
VR256X:$src0)),
(VPANDNDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v8f32 (vselect VK8WM:$mask,
(bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
(bitconvert (v8i32 immAllZerosV)))),
(VPANDDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v8f32 (vselect VK8WM:$mask,
(bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
(bitconvert (v8i32 immAllZerosV)))),
(VPORDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v8f32 (vselect VK8WM:$mask,
(bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
(bitconvert (v8i32 immAllZerosV)))),
(VPXORDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v8f32 (vselect VK8WM:$mask,
(bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
(bitconvert (v8i32 immAllZerosV)))),
(VPANDNDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v4f64 (vselect VK4WM:$mask,
(bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
VR256X:$src0)),
(VPANDQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v4f64 (vselect VK4WM:$mask,
(bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
VR256X:$src0)),
(VPORQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v4f64 (vselect VK4WM:$mask,
(bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
VR256X:$src0)),
(VPXORQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v4f64 (vselect VK4WM:$mask,
(bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
VR256X:$src0)),
(VPANDNQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v4f64 (vselect VK4WM:$mask,
(bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
(bitconvert (v8i32 immAllZerosV)))),
(VPANDQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v4f64 (vselect VK4WM:$mask,
(bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
(bitconvert (v8i32 immAllZerosV)))),
(VPORQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v4f64 (vselect VK4WM:$mask,
(bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
(bitconvert (v8i32 immAllZerosV)))),
(VPXORQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v4f64 (vselect VK4WM:$mask,
(bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
(bitconvert (v8i32 immAllZerosV)))),
(VPANDNQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
}
let Predicates = [HasAVX512] in {
def : Pat<(v16f32 (vselect VK16WM:$mask,
(bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
VR512:$src0)),
(VPANDDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v16f32 (vselect VK16WM:$mask,
(bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
VR512:$src0)),
(VPORDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v16f32 (vselect VK16WM:$mask,
(bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
VR512:$src0)),
(VPXORDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v16f32 (vselect VK16WM:$mask,
(bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
VR512:$src0)),
(VPANDNDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v16f32 (vselect VK16WM:$mask,
(bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
(bitconvert (v16i32 immAllZerosV)))),
(VPANDDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v16f32 (vselect VK16WM:$mask,
(bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
(bitconvert (v16i32 immAllZerosV)))),
(VPORDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v16f32 (vselect VK16WM:$mask,
(bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
(bitconvert (v16i32 immAllZerosV)))),
(VPXORDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v16f32 (vselect VK16WM:$mask,
(bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
(bitconvert (v16i32 immAllZerosV)))),
(VPANDNDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v8f64 (vselect VK8WM:$mask,
(bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
VR512:$src0)),
(VPANDQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v8f64 (vselect VK8WM:$mask,
(bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
VR512:$src0)),
(VPORQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v8f64 (vselect VK8WM:$mask,
(bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
VR512:$src0)),
(VPXORQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v8f64 (vselect VK8WM:$mask,
(bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
VR512:$src0)),
(VPANDNQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v8f64 (vselect VK8WM:$mask,
(bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
(bitconvert (v16i32 immAllZerosV)))),
(VPANDQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v8f64 (vselect VK8WM:$mask,
(bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
(bitconvert (v16i32 immAllZerosV)))),
(VPORQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v8f64 (vselect VK8WM:$mask,
(bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
(bitconvert (v16i32 immAllZerosV)))),
(VPXORQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v8f64 (vselect VK8WM:$mask,
(bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
(bitconvert (v16i32 immAllZerosV)))),
(VPANDNQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
}
multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),

View File

@ -498,16 +498,14 @@ entry:
define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; KNL-LABEL: test_mm512_mask_xor_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpxorq %zmm2, %zmm1, %zmm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
; KNL-NEXT: vpxorq %zmm2, %zmm1, %zmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm512_mask_xor_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x double> %__A to <8 x i64>
@ -522,16 +520,14 @@ entry:
define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; KNL-LABEL: test_mm512_maskz_xor_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpxorq %zmm1, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm512_maskz_xor_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x double> %__A to <8 x i64>
@ -546,16 +542,14 @@ entry:
define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; KNL-LABEL: test_mm512_mask_xor_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpxorq %zmm2, %zmm1, %zmm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1}
; KNL-NEXT: vpxord %zmm2, %zmm1, %zmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm512_mask_xor_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm1
; SKX-NEXT: kmovw %edi, %k1
; SKX-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1}
; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <16 x float> %__A to <16 x i32>
@ -570,16 +564,14 @@ entry:
define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; KNL-LABEL: test_mm512_maskz_xor_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpxord %zmm1, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm512_maskz_xor_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0
; SKX-NEXT: kmovw %edi, %k1
; SKX-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <16 x float> %__A to <16 x i32>
@ -594,16 +586,14 @@ entry:
define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; KNL-LABEL: test_mm512_mask_or_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vporq %zmm1, %zmm2, %zmm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
; KNL-NEXT: vporq %zmm1, %zmm2, %zmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm512_mask_or_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x double> %__A to <8 x i64>
@ -618,16 +608,14 @@ entry:
define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; KNL-LABEL: test_mm512_maskz_or_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vporq %zmm0, %zmm1, %zmm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vporq %zmm0, %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm512_maskz_or_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x double> %__A to <8 x i64>
@ -642,16 +630,14 @@ entry:
define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; KNL-LABEL: test_mm512_mask_or_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vporq %zmm1, %zmm2, %zmm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1}
; KNL-NEXT: vpord %zmm1, %zmm2, %zmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm512_mask_or_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vorps %zmm1, %zmm2, %zmm1
; SKX-NEXT: kmovw %edi, %k1
; SKX-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1}
; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <16 x float> %__A to <16 x i32>
@ -666,16 +652,14 @@ entry:
define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; KNL-LABEL: test_mm512_maskz_or_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vporq %zmm0, %zmm1, %zmm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpord %zmm0, %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm512_maskz_or_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0
; SKX-NEXT: kmovw %edi, %k1
; SKX-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <16 x float> %__A to <16 x i32>
@ -690,16 +674,14 @@ entry:
define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; KNL-LABEL: test_mm512_mask_and_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandq %zmm1, %zmm2, %zmm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
; KNL-NEXT: vpandq %zmm1, %zmm2, %zmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm512_mask_and_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x double> %__A to <8 x i64>
@ -714,16 +696,14 @@ entry:
define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; KNL-LABEL: test_mm512_maskz_and_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm512_maskz_and_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x double> %__A to <8 x i64>
@ -738,16 +718,14 @@ entry:
define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; KNL-LABEL: test_mm512_mask_and_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandq %zmm1, %zmm2, %zmm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1}
; KNL-NEXT: vpandd %zmm1, %zmm2, %zmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm512_mask_and_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandps %zmm1, %zmm2, %zmm1
; SKX-NEXT: kmovw %edi, %k1
; SKX-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1}
; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <16 x float> %__A to <16 x i32>
@ -762,16 +740,14 @@ entry:
define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; KNL-LABEL: test_mm512_maskz_and_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpandd %zmm0, %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm512_maskz_and_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0
; SKX-NEXT: kmovw %edi, %k1
; SKX-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <16 x float> %__A to <16 x i32>
@ -786,16 +762,14 @@ entry:
define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; KNL-LABEL: test_mm512_mask_andnot_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandnq %zmm2, %zmm1, %zmm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
; KNL-NEXT: vpandnq %zmm2, %zmm1, %zmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm512_mask_andnot_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x double> %__A to <8 x i64>
@ -811,16 +785,14 @@ entry:
define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; KNL-LABEL: test_mm512_maskz_andnot_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandnq %zmm1, %zmm0, %zmm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpandnq %zmm1, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm512_maskz_andnot_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x double> %__A to <8 x i64>
@ -836,16 +808,14 @@ entry:
define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; KNL-LABEL: test_mm512_mask_andnot_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandnq %zmm2, %zmm1, %zmm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1}
; KNL-NEXT: vpandnd %zmm2, %zmm1, %zmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm512_mask_andnot_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm1
; SKX-NEXT: kmovw %edi, %k1
; SKX-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1}
; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <16 x float> %__A to <16 x i32>
@ -861,16 +831,14 @@ entry:
define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; KNL-LABEL: test_mm512_maskz_andnot_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandnq %zmm1, %zmm0, %zmm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpandnd %zmm1, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm512_maskz_andnot_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0
; SKX-NEXT: kmovw %edi, %k1
; SKX-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <16 x float> %__A to <16 x i32>

View File

@ -222,16 +222,14 @@ entry:
define <4 x double> @test_mm256_mask_andnot_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
; KNL-LABEL: test_mm256_mask_andnot_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandnq %ymm2, %ymm1, %ymm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmpd %ymm1, %ymm0, %ymm0 {%k1}
; KNL-NEXT: vpandnq %ymm2, %ymm1, %ymm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm256_mask_andnot_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandnpd %ymm2, %ymm1, %ymm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmpd %ymm1, %ymm0, %ymm0 {%k1}
; SKX-NEXT: vandnpd %ymm2, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x double> %__A to <4 x i64>
@ -248,16 +246,14 @@ entry:
define <4 x double> @test_mm256_maskz_andnot_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
; KNL-LABEL: test_mm256_maskz_andnot_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandnq %ymm1, %ymm0, %ymm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z}
; KNL-NEXT: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm256_maskz_andnot_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z}
; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x double> %__A to <4 x i64>
@ -274,16 +270,14 @@ entry:
define <2 x double> @test_mm_mask_andnot_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; KNL-LABEL: test_mm_mask_andnot_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandnq %xmm2, %xmm1, %xmm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmpd %xmm1, %xmm0, %xmm0 {%k1}
; KNL-NEXT: vpandnq %xmm2, %xmm1, %xmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm_mask_andnot_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandnpd %xmm2, %xmm1, %xmm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmpd %xmm1, %xmm0, %xmm0 {%k1}
; SKX-NEXT: vandnpd %xmm2, %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <2 x double> %__A to <2 x i64>
@ -300,16 +294,14 @@ entry:
define <2 x double> @test_mm_maskz_andnot_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; KNL-LABEL: test_mm_maskz_andnot_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandnq %xmm1, %xmm0, %xmm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z}
; KNL-NEXT: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm_maskz_andnot_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z}
; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <2 x double> %__A to <2 x i64>
@ -326,16 +318,14 @@ entry:
define <8 x float> @test_mm256_mask_andnot_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
; KNL-LABEL: test_mm256_mask_andnot_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandnq %ymm2, %ymm1, %ymm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmps %ymm1, %ymm0, %ymm0 {%k1}
; KNL-NEXT: vpandnd %ymm2, %ymm1, %ymm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm256_mask_andnot_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandnps %ymm2, %ymm1, %ymm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmps %ymm1, %ymm0, %ymm0 {%k1}
; SKX-NEXT: vandnps %ymm2, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x float> %__A to <8 x i32>
@ -351,16 +341,14 @@ entry:
define <8 x float> @test_mm256_maskz_andnot_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
; KNL-LABEL: test_mm256_maskz_andnot_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandnq %ymm1, %ymm0, %ymm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z}
; KNL-NEXT: vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm256_maskz_andnot_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z}
; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x float> %__A to <8 x i32>
@ -376,16 +364,14 @@ entry:
define <4 x float> @test_mm_mask_andnot_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; KNL-LABEL: test_mm_mask_andnot_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandnq %xmm2, %xmm1, %xmm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmps %xmm1, %xmm0, %xmm0 {%k1}
; KNL-NEXT: vpandnd %xmm2, %xmm1, %xmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm_mask_andnot_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandnps %xmm2, %xmm1, %xmm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmps %xmm1, %xmm0, %xmm0 {%k1}
; SKX-NEXT: vandnps %xmm2, %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x float> %__A to <4 x i32>
@ -402,16 +388,14 @@ entry:
define <4 x float> @test_mm_maskz_andnot_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; KNL-LABEL: test_mm_maskz_andnot_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandnq %xmm1, %xmm0, %xmm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
; KNL-NEXT: vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm_maskz_andnot_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x float> %__A to <4 x i32>
@ -428,16 +412,14 @@ entry:
define <4 x double> @test_mm256_mask_and_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
; KNL-LABEL: test_mm256_mask_and_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandq %ymm1, %ymm2, %ymm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmpd %ymm1, %ymm0, %ymm0 {%k1}
; KNL-NEXT: vpandq %ymm1, %ymm2, %ymm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm256_mask_and_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandpd %ymm1, %ymm2, %ymm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmpd %ymm1, %ymm0, %ymm0 {%k1}
; SKX-NEXT: vandpd %ymm1, %ymm2, %ymm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x double> %__A to <4 x i64>
@ -453,16 +435,14 @@ entry:
define <4 x double> @test_mm256_maskz_and_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
; KNL-LABEL: test_mm256_maskz_and_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandq %ymm0, %ymm1, %ymm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z}
; KNL-NEXT: vpandq %ymm0, %ymm1, %ymm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm256_maskz_and_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandpd %ymm0, %ymm1, %ymm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z}
; SKX-NEXT: vandpd %ymm0, %ymm1, %ymm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x double> %__A to <4 x i64>
@ -478,16 +458,14 @@ entry:
define <2 x double> @test_mm_mask_and_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; KNL-LABEL: test_mm_mask_and_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandq %xmm1, %xmm2, %xmm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmpd %xmm1, %xmm0, %xmm0 {%k1}
; KNL-NEXT: vpandq %xmm1, %xmm2, %xmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm_mask_and_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandpd %xmm1, %xmm2, %xmm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmpd %xmm1, %xmm0, %xmm0 {%k1}
; SKX-NEXT: vandpd %xmm1, %xmm2, %xmm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <2 x double> %__A to <2 x i64>
@ -503,16 +481,14 @@ entry:
define <2 x double> @test_mm_maskz_and_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; KNL-LABEL: test_mm_maskz_and_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandq %xmm0, %xmm1, %xmm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z}
; KNL-NEXT: vpandq %xmm0, %xmm1, %xmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm_maskz_and_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandpd %xmm0, %xmm1, %xmm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z}
; SKX-NEXT: vandpd %xmm0, %xmm1, %xmm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <2 x double> %__A to <2 x i64>
@ -528,16 +504,14 @@ entry:
define <8 x float> @test_mm256_mask_and_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
; KNL-LABEL: test_mm256_mask_and_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandq %ymm1, %ymm2, %ymm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmps %ymm1, %ymm0, %ymm0 {%k1}
; KNL-NEXT: vpandd %ymm1, %ymm2, %ymm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm256_mask_and_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandps %ymm1, %ymm2, %ymm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmps %ymm1, %ymm0, %ymm0 {%k1}
; SKX-NEXT: vandps %ymm1, %ymm2, %ymm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x float> %__A to <8 x i32>
@ -552,16 +526,14 @@ entry:
define <8 x float> @test_mm256_maskz_and_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
; KNL-LABEL: test_mm256_maskz_and_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandq %ymm0, %ymm1, %ymm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z}
; KNL-NEXT: vpandd %ymm0, %ymm1, %ymm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm256_maskz_and_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandps %ymm0, %ymm1, %ymm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z}
; SKX-NEXT: vandps %ymm0, %ymm1, %ymm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x float> %__A to <8 x i32>
@ -576,16 +548,14 @@ entry:
define <4 x float> @test_mm_mask_and_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; KNL-LABEL: test_mm_mask_and_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandq %xmm1, %xmm2, %xmm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmps %xmm1, %xmm0, %xmm0 {%k1}
; KNL-NEXT: vpandd %xmm1, %xmm2, %xmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm_mask_and_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandps %xmm1, %xmm2, %xmm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmps %xmm1, %xmm0, %xmm0 {%k1}
; SKX-NEXT: vandps %xmm1, %xmm2, %xmm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x float> %__A to <4 x i32>
@ -601,16 +571,14 @@ entry:
define <4 x float> @test_mm_maskz_and_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; KNL-LABEL: test_mm_maskz_and_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandq %xmm0, %xmm1, %xmm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
; KNL-NEXT: vpandd %xmm0, %xmm1, %xmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm_maskz_and_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandps %xmm0, %xmm1, %xmm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
; SKX-NEXT: vandps %xmm0, %xmm1, %xmm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x float> %__A to <4 x i32>
@ -626,16 +594,14 @@ entry:
define <4 x double> @test_mm256_mask_xor_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
; KNL-LABEL: test_mm256_mask_xor_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpxorq %ymm2, %ymm1, %ymm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmpd %ymm1, %ymm0, %ymm0 {%k1}
; KNL-NEXT: vpxorq %ymm2, %ymm1, %ymm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm256_mask_xor_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vxorpd %ymm2, %ymm1, %ymm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmpd %ymm1, %ymm0, %ymm0 {%k1}
; SKX-NEXT: vxorpd %ymm2, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x double> %__A to <4 x i64>
@ -651,16 +617,14 @@ entry:
define <4 x double> @test_mm256_maskz_xor_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
; KNL-LABEL: test_mm256_maskz_xor_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpxorq %ymm1, %ymm0, %ymm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z}
; KNL-NEXT: vpxorq %ymm1, %ymm0, %ymm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm256_maskz_xor_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z}
; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x double> %__A to <4 x i64>
@ -676,16 +640,14 @@ entry:
define <2 x double> @test_mm_mask_xor_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; KNL-LABEL: test_mm_mask_xor_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpxorq %xmm2, %xmm1, %xmm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmpd %xmm1, %xmm0, %xmm0 {%k1}
; KNL-NEXT: vpxorq %xmm2, %xmm1, %xmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm_mask_xor_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vxorpd %xmm2, %xmm1, %xmm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmpd %xmm1, %xmm0, %xmm0 {%k1}
; SKX-NEXT: vxorpd %xmm2, %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <2 x double> %__A to <2 x i64>
@ -701,16 +663,14 @@ entry:
define <2 x double> @test_mm_maskz_xor_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; KNL-LABEL: test_mm_maskz_xor_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpxorq %xmm1, %xmm0, %xmm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z}
; KNL-NEXT: vpxorq %xmm1, %xmm0, %xmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm_maskz_xor_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z}
; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <2 x double> %__A to <2 x i64>
@ -726,16 +686,14 @@ entry:
define <8 x float> @test_mm256_mask_xor_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
; KNL-LABEL: test_mm256_mask_xor_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpxorq %ymm2, %ymm1, %ymm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmps %ymm1, %ymm0, %ymm0 {%k1}
; KNL-NEXT: vpxord %ymm2, %ymm1, %ymm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm256_mask_xor_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vxorps %ymm2, %ymm1, %ymm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmps %ymm1, %ymm0, %ymm0 {%k1}
; SKX-NEXT: vxorps %ymm2, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x float> %__A to <8 x i32>
@ -750,16 +708,14 @@ entry:
define <8 x float> @test_mm256_maskz_xor_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
; KNL-LABEL: test_mm256_maskz_xor_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpxorq %ymm1, %ymm0, %ymm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z}
; KNL-NEXT: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm256_maskz_xor_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z}
; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x float> %__A to <8 x i32>
@ -774,16 +730,14 @@ entry:
define <4 x float> @test_mm_mask_xor_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; KNL-LABEL: test_mm_mask_xor_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpxorq %xmm2, %xmm1, %xmm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmps %xmm1, %xmm0, %xmm0 {%k1}
; KNL-NEXT: vpxord %xmm2, %xmm1, %xmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm_mask_xor_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vxorps %xmm2, %xmm1, %xmm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmps %xmm1, %xmm0, %xmm0 {%k1}
; SKX-NEXT: vxorps %xmm2, %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x float> %__A to <4 x i32>
@ -799,16 +753,14 @@ entry:
define <4 x float> @test_mm_maskz_xor_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; KNL-LABEL: test_mm_maskz_xor_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpxorq %xmm1, %xmm0, %xmm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
; KNL-NEXT: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm_maskz_xor_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x float> %__A to <4 x i32>
@ -824,16 +776,14 @@ entry:
define <4 x double> @test_mm256_mask_or_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
; KNL-LABEL: test_mm256_mask_or_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vporq %ymm1, %ymm2, %ymm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmpd %ymm1, %ymm0, %ymm0 {%k1}
; KNL-NEXT: vporq %ymm1, %ymm2, %ymm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm256_mask_or_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vorpd %ymm1, %ymm2, %ymm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmpd %ymm1, %ymm0, %ymm0 {%k1}
; SKX-NEXT: vorpd %ymm1, %ymm2, %ymm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x double> %__A to <4 x i64>
@ -849,16 +799,14 @@ entry:
define <4 x double> @test_mm256_maskz_or_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
; KNL-LABEL: test_mm256_maskz_or_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vporq %ymm0, %ymm1, %ymm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z}
; KNL-NEXT: vporq %ymm0, %ymm1, %ymm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm256_maskz_or_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z}
; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x double> %__A to <4 x i64>
@ -874,16 +822,14 @@ entry:
define <2 x double> @test_mm_mask_or_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; KNL-LABEL: test_mm_mask_or_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vporq %xmm1, %xmm2, %xmm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmpd %xmm1, %xmm0, %xmm0 {%k1}
; KNL-NEXT: vporq %xmm1, %xmm2, %xmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm_mask_or_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vorpd %xmm1, %xmm2, %xmm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmpd %xmm1, %xmm0, %xmm0 {%k1}
; SKX-NEXT: vorpd %xmm1, %xmm2, %xmm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <2 x double> %__A to <2 x i64>
@ -899,16 +845,14 @@ entry:
define <2 x double> @test_mm_maskz_or_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; KNL-LABEL: test_mm_maskz_or_pd:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vporq %xmm0, %xmm1, %xmm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z}
; KNL-NEXT: vporq %xmm0, %xmm1, %xmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm_maskz_or_pd:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z}
; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <2 x double> %__A to <2 x i64>
@ -924,16 +868,14 @@ entry:
define <8 x float> @test_mm256_mask_or_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
; KNL-LABEL: test_mm256_mask_or_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vporq %ymm1, %ymm2, %ymm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmps %ymm1, %ymm0, %ymm0 {%k1}
; KNL-NEXT: vpord %ymm1, %ymm2, %ymm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm256_mask_or_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vorps %ymm1, %ymm2, %ymm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmps %ymm1, %ymm0, %ymm0 {%k1}
; SKX-NEXT: vorps %ymm1, %ymm2, %ymm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x float> %__A to <8 x i32>
@ -948,16 +890,14 @@ entry:
define <8 x float> @test_mm256_maskz_or_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
; KNL-LABEL: test_mm256_maskz_or_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vporq %ymm0, %ymm1, %ymm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z}
; KNL-NEXT: vpord %ymm0, %ymm1, %ymm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm256_maskz_or_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z}
; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x float> %__A to <8 x i32>
@ -972,16 +912,14 @@ entry:
define <4 x float> @test_mm_mask_or_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; KNL-LABEL: test_mm_mask_or_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vporq %xmm1, %xmm2, %xmm1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vblendmps %xmm1, %xmm0, %xmm0 {%k1}
; KNL-NEXT: vpord %xmm1, %xmm2, %xmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm_mask_or_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vorps %xmm1, %xmm2, %xmm1
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vblendmps %xmm1, %xmm0, %xmm0 {%k1}
; SKX-NEXT: vorps %xmm1, %xmm2, %xmm0 {%k1}
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x float> %__A to <4 x i32>
@ -997,16 +935,14 @@ entry:
define <4 x float> @test_mm_maskz_or_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; KNL-LABEL: test_mm_maskz_or_ps:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vporq %xmm0, %xmm1, %xmm0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
; KNL-NEXT: vpord %xmm0, %xmm1, %xmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: test_mm_maskz_or_ps:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0
; SKX-NEXT: kmovb %edi, %k1
; SKX-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 {%k1} {z}
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x float> %__A to <4 x i32>