mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-23 19:59:48 +00:00
Add support for breaking 256-bit v16i16 and v32i8 VSETCC into two 128-bit ones, avoiding sclarization. Add vex form of pcmpeqq and pcmpgtq. Fixes more cases for PR10712.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@138321 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
07097ddbc0
commit
a534780da0
@ -989,6 +989,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::SRA, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::SRA, MVT::v16i16, Custom);
|
||||
|
||||
setOperationAction(ISD::VSETCC, MVT::v32i8, Custom);
|
||||
setOperationAction(ISD::VSETCC, MVT::v16i16, Custom);
|
||||
setOperationAction(ISD::VSETCC, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::VSETCC, MVT::v4i64, Custom);
|
||||
|
||||
|
@ -4889,6 +4889,11 @@ let Predicates = [HasAVX] in {
|
||||
0>, VEX_4V;
|
||||
defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
|
||||
0>, VEX_4V;
|
||||
|
||||
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
|
||||
(VPCMPEQQrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
|
||||
(VPCMPEQQrm VR128:$src1, addr:$src2)>;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
@ -5099,9 +5104,16 @@ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
|
||||
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in
|
||||
let Predicates = [HasAVX] in {
|
||||
defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
|
||||
0>, VEX_4V;
|
||||
|
||||
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
|
||||
(VPCMPGTQrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
|
||||
(VPCMPGTQrm VR128:$src1, addr:$src2)>;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in
|
||||
defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
|
||||
|
||||
@ -5660,6 +5672,11 @@ def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (i32 0))),
|
||||
def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (i32 0))),
|
||||
(v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>;
|
||||
|
||||
def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (i32 0))),
|
||||
(v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>;
|
||||
def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (i32 0))),
|
||||
(v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VMASKMOV - Conditional SIMD Packed Loads and Stores
|
||||
|
@ -53,3 +53,80 @@ define <8 x i32> @int256-cmp(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
|
||||
ret <8 x i32> %x
|
||||
}
|
||||
|
||||
; CHECK: vextractf128 $1
|
||||
; CHECK: vextractf128 $1
|
||||
; CHECK-NEXT: vpcmpgtq %xmm
|
||||
; CHECK-NEXT: vpcmpgtq %xmm
|
||||
; CHECK-NEXT: vinsertf128 $1
|
||||
define <4 x i64> @v4i64-cmp(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
|
||||
%bincmp = icmp slt <4 x i64> %i, %j
|
||||
%x = sext <4 x i1> %bincmp to <4 x i64>
|
||||
ret <4 x i64> %x
|
||||
}
|
||||
|
||||
; CHECK: vextractf128 $1
|
||||
; CHECK: vextractf128 $1
|
||||
; CHECK-NEXT: vpcmpgtw %xmm
|
||||
; CHECK-NEXT: vpcmpgtw %xmm
|
||||
; CHECK-NEXT: vinsertf128 $1
|
||||
define <16 x i16> @v16i16-cmp(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
|
||||
%bincmp = icmp slt <16 x i16> %i, %j
|
||||
%x = sext <16 x i1> %bincmp to <16 x i16>
|
||||
ret <16 x i16> %x
|
||||
}
|
||||
|
||||
; CHECK: vextractf128 $1
|
||||
; CHECK: vextractf128 $1
|
||||
; CHECK-NEXT: vpcmpgtb %xmm
|
||||
; CHECK-NEXT: vpcmpgtb %xmm
|
||||
; CHECK-NEXT: vinsertf128 $1
|
||||
define <32 x i8> @v32i8-cmp(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
|
||||
%bincmp = icmp slt <32 x i8> %i, %j
|
||||
%x = sext <32 x i1> %bincmp to <32 x i8>
|
||||
ret <32 x i8> %x
|
||||
}
|
||||
|
||||
; CHECK: vextractf128 $1
|
||||
; CHECK: vextractf128 $1
|
||||
; CHECK-NEXT: vpcmpeqd %xmm
|
||||
; CHECK-NEXT: vpcmpeqd %xmm
|
||||
; CHECK-NEXT: vinsertf128 $1
|
||||
define <8 x i32> @int256-cmpeq(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
|
||||
%bincmp = icmp eq <8 x i32> %i, %j
|
||||
%x = sext <8 x i1> %bincmp to <8 x i32>
|
||||
ret <8 x i32> %x
|
||||
}
|
||||
|
||||
; CHECK: vextractf128 $1
|
||||
; CHECK: vextractf128 $1
|
||||
; CHECK-NEXT: vpcmpeqq %xmm
|
||||
; CHECK-NEXT: vpcmpeqq %xmm
|
||||
; CHECK-NEXT: vinsertf128 $1
|
||||
define <4 x i64> @v4i64-cmpeq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
|
||||
%bincmp = icmp eq <4 x i64> %i, %j
|
||||
%x = sext <4 x i1> %bincmp to <4 x i64>
|
||||
ret <4 x i64> %x
|
||||
}
|
||||
|
||||
; CHECK: vextractf128 $1
|
||||
; CHECK: vextractf128 $1
|
||||
; CHECK-NEXT: vpcmpeqw %xmm
|
||||
; CHECK-NEXT: vpcmpeqw %xmm
|
||||
; CHECK-NEXT: vinsertf128 $1
|
||||
define <16 x i16> @v16i16-cmpeq(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
|
||||
%bincmp = icmp eq <16 x i16> %i, %j
|
||||
%x = sext <16 x i1> %bincmp to <16 x i16>
|
||||
ret <16 x i16> %x
|
||||
}
|
||||
|
||||
; CHECK: vextractf128 $1
|
||||
; CHECK: vextractf128 $1
|
||||
; CHECK-NEXT: vpcmpeqb %xmm
|
||||
; CHECK-NEXT: vpcmpeqb %xmm
|
||||
; CHECK-NEXT: vinsertf128 $1
|
||||
define <32 x i8> @v32i8-cmpeq(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
|
||||
%bincmp = icmp eq <32 x i8> %i, %j
|
||||
%x = sext <32 x i1> %bincmp to <32 x i8>
|
||||
ret <32 x i8> %x
|
||||
}
|
||||
|
||||
|
@ -1,10 +1,8 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
||||
|
||||
; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd
|
||||
|
||||
; CHECK: vextractf128 $0
|
||||
; CHECK-NEXT: punpcklbw
|
||||
; CHECK-NEXT: punpckhbw
|
||||
; CHECK: vpunpcklbw %xmm
|
||||
; CHECK-NEXT: vpunpckhbw %xmm
|
||||
; CHECK-NEXT: vinsertf128 $1
|
||||
; CHECK-NEXT: vpermilps $85
|
||||
define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
|
||||
@ -13,8 +11,7 @@ entry:
|
||||
ret <32 x i8> %shuffle
|
||||
}
|
||||
|
||||
; CHECK: vextractf128 $0
|
||||
; CHECK-NEXT: punpckhwd
|
||||
; CHECK: vpunpckhwd %xmm
|
||||
; CHECK-NEXT: vinsertf128 $1
|
||||
; CHECK-NEXT: vpermilps $85
|
||||
define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
|
||||
|
Loading…
Reference in New Issue
Block a user