[AVX512] Enabling bit logic lowering

Added lowering tests.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224132 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Robert Khasanov 2014-12-12 17:02:18 +00:00
parent fe6c7ee0f0
commit 5dc8ac87f1
4 changed files with 247 additions and 0 deletions

View File

@ -1570,6 +1570,13 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Legal);
setOperationAction(ISD::AND, MVT::v8i32, Legal);
setOperationAction(ISD::OR, MVT::v8i32, Legal);
setOperationAction(ISD::XOR, MVT::v8i32, Legal);
setOperationAction(ISD::AND, MVT::v4i32, Legal);
setOperationAction(ISD::OR, MVT::v4i32, Legal);
setOperationAction(ISD::XOR, MVT::v4i32, Legal);
}
// SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion

View File

@ -2922,6 +2922,7 @@ let isCodeGenOnly = 1 in {
///
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f256mem,
[(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
@ -2952,6 +2953,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
(loadv2i64 addr:$src2)))], 0>,
PD, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,

View File

@ -0,0 +1,101 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
; CHECK-LABEL: vpandd
; CHECK: vpandd %zmm
; CHECK: ret
define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%x = and <16 x i32> %a2, %b
ret <16 x i32> %x
}
; CHECK-LABEL: vpord
; CHECK: vpord %zmm
; CHECK: ret
define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%x = or <16 x i32> %a2, %b
ret <16 x i32> %x
}
; CHECK-LABEL: vpxord
; CHECK: vpxord %zmm
; CHECK: ret
define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%x = xor <16 x i32> %a2, %b
ret <16 x i32> %x
}
; CHECK-LABEL: vpandq
; CHECK: vpandq %zmm
; CHECK: ret
define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
%x = and <8 x i64> %a2, %b
ret <8 x i64> %x
}
; CHECK-LABEL: vporq
; CHECK: vporq %zmm
; CHECK: ret
define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
%x = or <8 x i64> %a2, %b
ret <8 x i64> %x
}
; CHECK-LABEL: vpxorq
; CHECK: vpxorq %zmm
; CHECK: ret
define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
%x = xor <8 x i64> %a2, %b
ret <8 x i64> %x
}
; CHECK-LABEL: orq_broadcast
; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
; CHECK: ret
define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
%b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
ret <8 x i64> %b
}
; CHECK-LABEL: andd512fold
; CHECK: vpandd (%
; CHECK: ret
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
entry:
%a = load <16 x i32>* %x, align 4
%b = and <16 x i32> %y, %a
ret <16 x i32> %b
}
; CHECK-LABEL: andqbrst
; CHECK: vpandq (%rdi){1to8}, %zmm
; CHECK: ret
define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
entry:
%a = load i64* %ap, align 8
%b = insertelement <8 x i64> undef, i64 %a, i32 0
%c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
%d = and <8 x i64> %p1, %c
ret <8 x i64>%d
}

View File

@ -0,0 +1,137 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl | FileCheck %s
; 256-bit
; CHECK-LABEL: vpandd256
; CHECK: vpandd %ymm
; CHECK: ret
define <8 x i32> @vpandd256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%x = and <8 x i32> %a2, %b
ret <8 x i32> %x
}
; CHECK-LABEL: vpord256
; CHECK: vpord %ymm
; CHECK: ret
define <8 x i32> @vpord256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%x = or <8 x i32> %a2, %b
ret <8 x i32> %x
}
; CHECK-LABEL: vpxord256
; CHECK: vpxord %ymm
; CHECK: ret
define <8 x i32> @vpxord256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%x = xor <8 x i32> %a2, %b
ret <8 x i32> %x
}
; CHECK-LABEL: vpandq256
; CHECK: vpandq %ymm
; CHECK: ret
define <4 x i64> @vpandq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
%x = and <4 x i64> %a2, %b
ret <4 x i64> %x
}
; CHECK-LABEL: vporq256
; CHECK: vporq %ymm
; CHECK: ret
define <4 x i64> @vporq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
%x = or <4 x i64> %a2, %b
ret <4 x i64> %x
}
; CHECK-LABEL: vpxorq256
; CHECK: vpxorq %ymm
; CHECK: ret
define <4 x i64> @vpxorq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
%x = xor <4 x i64> %a2, %b
ret <4 x i64> %x
}
; 128-bit
; CHECK-LABEL: vpandd128
; CHECK: vpandd %xmm
; CHECK: ret
define <4 x i32> @vpandd128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <4 x i32> %a, <i32 1, i32 1, i32 1, i32 1>
%x = and <4 x i32> %a2, %b
ret <4 x i32> %x
}
; CHECK-LABEL: vpord128
; CHECK: vpord %xmm
; CHECK: ret
define <4 x i32> @vpord128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <4 x i32> %a, <i32 1, i32 1, i32 1, i32 1>
%x = or <4 x i32> %a2, %b
ret <4 x i32> %x
}
; CHECK-LABEL: vpxord128
; CHECK: vpxord %xmm
; CHECK: ret
define <4 x i32> @vpxord128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <4 x i32> %a, <i32 1, i32 1, i32 1, i32 1>
%x = xor <4 x i32> %a2, %b
ret <4 x i32> %x
}
; CHECK-LABEL: vpandq128
; CHECK: vpandq %xmm
; CHECK: ret
define <2 x i64> @vpandq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <2 x i64> %a, <i64 1, i64 1>
%x = and <2 x i64> %a2, %b
ret <2 x i64> %x
}
; CHECK-LABEL: vporq128
; CHECK: vporq %xmm
; CHECK: ret
define <2 x i64> @vporq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <2 x i64> %a, <i64 1, i64 1>
%x = or <2 x i64> %a2, %b
ret <2 x i64> %x
}
; CHECK-LABEL: vpxorq128
; CHECK: vpxorq %xmm
; CHECK: ret
define <2 x i64> @vpxorq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <2 x i64> %a, <i64 1, i64 1>
%x = xor <2 x i64> %a2, %b
ret <2 x i64> %x
}