mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-20 11:08:27 +00:00
Teach the DAGCombiner how to fold 'vselect' dag nodes according
to the following two rules: 1) fold (vselect (build_vector AllOnes), A, B) -> A 2) fold (vselect (build_vector AllZeros), A, B) -> B git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@198777 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2b5313d26b
commit
638e97f135
@ -4402,6 +4402,13 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
|
||||
}
|
||||
|
||||
// Fold (vselect (build_vector all_ones), N1, N2) -> N1
|
||||
if (ISD::isBuildVectorAllOnes(N0.getNode()))
|
||||
return N1;
|
||||
// Fold (vselect (build_vector all_zeros), N1, N2) -> N2
|
||||
if (ISD::isBuildVectorAllZeros(N0.getNode()))
|
||||
return N2;
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -40,16 +40,16 @@ define <16 x i8> @xor16xi8(<16 x i8> %a, <16 x i8> %b) {
|
||||
|
||||
define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b) {
|
||||
;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
%tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
|
||||
%tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 >
|
||||
%tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0 >
|
||||
%tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1 >
|
||||
%tmp3 = or <8 x i8> %tmp1, %tmp2
|
||||
ret <8 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) {
|
||||
;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
%tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
|
||||
%tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 >
|
||||
;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
%tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0 >
|
||||
%tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1 >
|
||||
%tmp3 = or <16 x i8> %tmp1, %tmp2
|
||||
ret <16 x i8> %tmp3
|
||||
}
|
||||
@ -444,10 +444,11 @@ define <2 x i64> @orn2xi64(<2 x i64> %a, <2 x i64> %b) {
|
||||
%tmp2 = or <2 x i64> %a, %tmp1
|
||||
ret <2 x i64> %tmp2
|
||||
}
|
||||
|
||||
define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) {
|
||||
;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
%tmp1 = and <2 x i32> %a, < i32 -1, i32 -1 >
|
||||
%tmp2 = and <2 x i32> %b, < i32 0, i32 0 >
|
||||
%tmp1 = and <2 x i32> %a, < i32 -1, i32 0 >
|
||||
%tmp2 = and <2 x i32> %b, < i32 0, i32 -1 >
|
||||
%tmp3 = or <2 x i32> %tmp1, %tmp2
|
||||
ret <2 x i32> %tmp3
|
||||
}
|
||||
@ -455,40 +456,40 @@ define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) {
|
||||
|
||||
define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b) {
|
||||
;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
%tmp1 = and <4 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1 >
|
||||
%tmp2 = and <4 x i16> %b, < i16 0, i16 0,i16 0, i16 0 >
|
||||
%tmp1 = and <4 x i16> %a, < i16 -1, i16 0, i16 -1,i16 0 >
|
||||
%tmp2 = and <4 x i16> %b, < i16 0, i16 -1,i16 0, i16 -1 >
|
||||
%tmp3 = or <4 x i16> %tmp1, %tmp2
|
||||
ret <4 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b) {
|
||||
;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
%tmp1 = and <1 x i64> %a, < i64 -1 >
|
||||
%tmp2 = and <1 x i64> %b, < i64 0 >
|
||||
%tmp1 = and <1 x i64> %a, < i64 -16 >
|
||||
%tmp2 = and <1 x i64> %b, < i64 15 >
|
||||
%tmp3 = or <1 x i64> %tmp1, %tmp2
|
||||
ret <1 x i64> %tmp3
|
||||
}
|
||||
|
||||
define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b) {
|
||||
;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
%tmp1 = and <4 x i32> %a, < i32 -1, i32 -1, i32 -1, i32 -1 >
|
||||
%tmp2 = and <4 x i32> %b, < i32 0, i32 0, i32 0, i32 0 >
|
||||
%tmp1 = and <4 x i32> %a, < i32 -1, i32 0, i32 -1, i32 0 >
|
||||
%tmp2 = and <4 x i32> %b, < i32 0, i32 -1, i32 0, i32 -1 >
|
||||
%tmp3 = or <4 x i32> %tmp1, %tmp2
|
||||
ret <4 x i32> %tmp3
|
||||
}
|
||||
|
||||
define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) {
|
||||
;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
%tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1, i16 -1, i16 -1, i16 -1,i16 -1 >
|
||||
%tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0 >
|
||||
%tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 0,i16 0, i16 -1, i16 -1, i16 0,i16 0 >
|
||||
%tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 -1, i16 -1, i16 0, i16 0, i16 -1, i16 -1 >
|
||||
%tmp3 = or <8 x i16> %tmp1, %tmp2
|
||||
ret <8 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b) {
|
||||
;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
%tmp1 = and <2 x i64> %a, < i64 -1, i64 -1 >
|
||||
%tmp2 = and <2 x i64> %b, < i64 0, i64 0 >
|
||||
%tmp1 = and <2 x i64> %a, < i64 -1, i64 0 >
|
||||
%tmp2 = and <2 x i64> %b, < i64 0, i64 -1 >
|
||||
%tmp3 = or <2 x i64> %tmp1, %tmp2
|
||||
ret <2 x i64> %tmp3
|
||||
}
|
||||
|
@ -1,12 +1,10 @@
|
||||
; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
|
||||
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
|
||||
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; Make sure that we don't crash when legalizng vselect and vsetcc and that
|
||||
; Make sure that we don't crash when legalizing vselect and vsetcc and that
|
||||
; we are able to generate vector blend instructions.
|
||||
|
||||
; CHECK: simple_widen
|
||||
; CHECK: blend
|
||||
; CHECK-LABEL: simple_widen
|
||||
; CHECK-NOT: blend
|
||||
; CHECK: ret
|
||||
define void @simple_widen() {
|
||||
entry:
|
||||
@ -15,7 +13,7 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: complex_inreg_work
|
||||
; CHECK-LABEL: complex_inreg_work
|
||||
; CHECK: blend
|
||||
; CHECK: ret
|
||||
|
||||
@ -27,8 +25,8 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: zero_test
|
||||
; CHECK: blend
|
||||
; CHECK-LABEL: zero_test
|
||||
; CHECK: xorps %xmm0, %xmm0
|
||||
; CHECK: ret
|
||||
|
||||
define void @zero_test() {
|
||||
@ -38,7 +36,7 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: full_test
|
||||
; CHECK-LABEL: full_test
|
||||
; CHECK: blend
|
||||
; CHECK: ret
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: test
|
||||
; CHECK: vmovdqu32
|
||||
; CHECK: vpxord
|
||||
; CHECK: ret
|
||||
define <16 x i32> @test() {
|
||||
entry:
|
||||
|
@ -130,4 +130,47 @@ define <8 x i16> @test13(<8 x i16> %a, <8 x i16> %b) {
|
||||
; CHECK-NOT: psraw
|
||||
; CHECK: ret
|
||||
|
||||
; Fold (vselect (build_vector AllOnes), N1, N2) -> N1
|
||||
|
||||
define <4 x float> @test14(<4 x float> %a, <4 x float> %b) {
|
||||
%1 = select <4 x i1> <i1 true, i1 undef, i1 true, i1 undef>, <4 x float> %a, <4 x float> %b
|
||||
ret <4 x float> %1
|
||||
}
|
||||
; CHECK-LABEL: test14
|
||||
; CHECK-NOT: psllw
|
||||
; CHECK-NOT: psraw
|
||||
; CHECK-NOT: pcmpeq
|
||||
; CHECK: ret
|
||||
|
||||
define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
|
||||
%1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
; CHECK-LABEL: test15
|
||||
; CHECK-NOT: psllw
|
||||
; CHECK-NOT: psraw
|
||||
; CHECK-NOT: pcmpeq
|
||||
; CHECK: ret
|
||||
|
||||
; Fold (vselect (build_vector AllZeros), N1, N2) -> N2
|
||||
|
||||
define <4 x float> @test16(<4 x float> %a, <4 x float> %b) {
|
||||
%1 = select <4 x i1> <i1 false, i1 undef, i1 false, i1 undef>, <4 x float> %a, <4 x float> %b
|
||||
ret <4 x float> %1
|
||||
}
|
||||
; CHECK-LABEL: test16
|
||||
; CHECK-NOT: psllw
|
||||
; CHECK-NOT: psraw
|
||||
; CHECK-NOT: xorps
|
||||
; CHECK: ret
|
||||
|
||||
define <8 x i16> @test17(<8 x i16> %a, <8 x i16> %b) {
|
||||
%1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 undef, i1 undef, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
; CHECK-LABEL: test17
|
||||
; CHECK-NOT: psllw
|
||||
; CHECK-NOT: psraw
|
||||
; CHECK-NOT: xorps
|
||||
; CHECK: ret
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user