[X86] Remove and autoupgrade kand/kandn/kor/kxor/kxnor/knot intrinsics.

Clang already stopped using these a couple months ago.

The test cases aren't great as there is nothing forcing the operations to stay in k-registers so some of them moved back to scalar ops due to the bitcasts being moved around.

llvm-svn: 324177
This commit is contained in:
Craig Topper 2018-02-03 20:18:25 +00:00
parent 9688ed61fe
commit 071ad9c6e0
7 changed files with 147 additions and 158 deletions

View File

@ -3727,23 +3727,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Mask instructions
// 16-bit mask
def int_x86_avx512_kand_w : // TODO: remove this intrinsic
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_kandn_w : // TODO: remove this intrinsic
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_knot_w : // TODO: remove this intrinsic
Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_kor_w : // TODO: remove this intrinsic
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_kxor_w : // TODO: remove this intrinsic
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_kxnor_w : // TODO: remove this intrinsic
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">,
Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>;

View File

@ -109,6 +109,12 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name == "sse2.pminu.b" || // Added in 3.9
Name == "sse41.pminuw" || // Added in 3.9
Name == "sse41.pminud" || // Added in 3.9
Name == "avx512.kand.w" || // Added in 7.0
Name == "avx512.kandn.w" || // Added in 7.0
Name == "avx512.knot.w" || // Added in 7.0
Name == "avx512.kor.w" || // Added in 7.0
Name == "avx512.kxor.w" || // Added in 7.0
Name == "avx512.kxnor.w" || // Added in 7.0
Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
Name.startswith("avx2.pmax") || // Added in 3.9
Name.startswith("avx2.pmin") || // Added in 3.9
@ -1119,6 +1125,37 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(LHS, RHS,
makeArrayRef(Indices, NumElts));
Rep = Builder.CreateBitCast(Rep, CI->getType());
} else if (IsX86 && Name == "avx512.kand.w") {
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
Rep = Builder.CreateAnd(LHS, RHS);
Rep = Builder.CreateBitCast(Rep, CI->getType());
} else if (IsX86 && Name == "avx512.kandn.w") {
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
LHS = Builder.CreateNot(LHS);
Rep = Builder.CreateAnd(LHS, RHS);
Rep = Builder.CreateBitCast(Rep, CI->getType());
} else if (IsX86 && Name == "avx512.kor.w") {
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
Rep = Builder.CreateOr(LHS, RHS);
Rep = Builder.CreateBitCast(Rep, CI->getType());
} else if (IsX86 && Name == "avx512.kxor.w") {
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
Rep = Builder.CreateXor(LHS, RHS);
Rep = Builder.CreateBitCast(Rep, CI->getType());
} else if (IsX86 && Name == "avx512.kxnor.w") {
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
LHS = Builder.CreateNot(LHS);
Rep = Builder.CreateXor(LHS, RHS);
Rep = Builder.CreateBitCast(Rep, CI->getType());
} else if (IsX86 && Name == "avx512.knot.w") {
Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
Rep = Builder.CreateNot(Rep);
Rep = Builder.CreateBitCast(Rep, CI->getType());
} else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
Type *I32Ty = Type::getInt32Ty(C);
Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),

View File

@ -20563,33 +20563,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
case Intrinsic::x86_avx512_knot_w: {
SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
SDValue RHS = DAG.getConstant(1, dl, MVT::v16i1);
SDValue Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS, RHS);
return DAG.getBitcast(MVT::i16, Res);
}
case Intrinsic::x86_avx512_kandn_w: {
SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
// Invert LHS for the not.
LHS = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS,
DAG.getConstant(1, dl, MVT::v16i1));
SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
SDValue Res = DAG.getNode(ISD::AND, dl, MVT::v16i1, LHS, RHS);
return DAG.getBitcast(MVT::i16, Res);
}
case Intrinsic::x86_avx512_kxnor_w: {
SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
SDValue Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS, RHS);
// Invert result for the not.
Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, Res,
DAG.getConstant(1, dl, MVT::v16i1));
return DAG.getBitcast(MVT::i16, Res);
}
case Intrinsic::x86_sse42_pcmpistria128:
case Intrinsic::x86_sse42_pcmpestria128:
case Intrinsic::x86_sse42_pcmpistric128:

View File

@ -465,9 +465,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_kand_w, MASK_BINOP, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_kor_w, MASK_BINOP, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_kxor_w, MASK_BINOP, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_mask_add_ps_512, INTR_TYPE_2OP_MASK, ISD::FADD,

View File

@ -12,13 +12,16 @@ define zeroext i16 @cmp_kor_seq_16(<16 x float> %a, <16 x float> %b, <16 x float
; CHECK-LABEL: cmp_kor_seq_16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vcmpgeps %zmm4, %zmm0, %k0
; CHECK-NEXT: vcmpgeps %zmm4, %zmm1, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: vcmpgeps %zmm4, %zmm2, %k1
; CHECK-NEXT: vcmpgeps %zmm4, %zmm3, %k2
; CHECK-NEXT: korw %k2, %k1, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %ecx
; CHECK-NEXT: vcmpgeps %zmm4, %zmm1, %k0
; CHECK-NEXT: kmovw %k0, %edx
; CHECK-NEXT: vcmpgeps %zmm4, %zmm2, %k0
; CHECK-NEXT: kmovw %k0, %esi
; CHECK-NEXT: vcmpgeps %zmm4, %zmm3, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: orl %ecx, %edx
; CHECK-NEXT: orl %esi, %eax
; CHECK-NEXT: orl %edx, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
entry:

View File

@ -3757,3 +3757,104 @@ define i8@test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2
ret i8 %res2
}
declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
define i16 @test_kand(i16 %a0, i16 %a1) {
; CHECK-LABEL: test_kand:
; CHECK: ## %bb.0:
; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: movw $8, %ax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
ret i16 %t2
}
declare i16 @llvm.x86.avx512.kandn.w(i16, i16) nounwind readnone
define i16 @test_kandn(i16 %a0, i16 %a1) {
; CHECK-LABEL: test_kandn:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: movw $8, %ax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: kandnw %k1, %k0, %k0
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: kandnw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kandn.w(i16 %t1, i16 %a1)
ret i16 %t2
}
declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
define i16 @test_knot(i16 %a0) {
; CHECK-LABEL: test_knot:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: knotw %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
ret i16 %res
}
declare i16 @llvm.x86.avx512.kor.w(i16, i16) nounwind readnone
define i16 @test_kor(i16 %a0, i16 %a1) {
; CHECK-LABEL: test_kor:
; CHECK: ## %bb.0:
; CHECK-NEXT: orl %esi, %edi
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: movw $8, %ax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kor.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kor.w(i16 %t1, i16 %a1)
ret i16 %t2
}
declare i16 @llvm.x86.avx512.kxnor.w(i16, i16) nounwind readnone
; TODO: the two kxnor instructions here a no op and should be elimintaed,
; probably by FoldConstantArithmetic in SelectionDAG.
define i16 @test_kxnor(i16 %a0, i16 %a1) {
; CHECK-LABEL: test_kxnor:
; CHECK: ## %bb.0:
; CHECK-NEXT: xorl %esi, %edi
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: movw $8, %ax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: kxorw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kxnor.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kxnor.w(i16 %t1, i16 %a1)
ret i16 %t2
}
declare i16 @llvm.x86.avx512.kxor.w(i16, i16) nounwind readnone
define i16 @test_kxor(i16 %a0, i16 %a1) {
; CHECK-LABEL: test_kxor:
; CHECK: ## %bb.0:
; CHECK-NEXT: xorl %esi, %edi
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: movw $8, %ax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: kxorw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kxor.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kxor.w(i16 %t1, i16 %a1)
ret i16 %t2
}

View File

@ -29,111 +29,6 @@ define i32 @test_kortestc(i16 %a0, i16 %a1) {
ret i32 %res
}
declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
define i16 @test_kand(i16 %a0, i16 %a1) {
; CHECK-LABEL: test_kand:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k0
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: movw $8, %ax
; CHECK-NEXT: kmovw %eax, %k2
; CHECK-NEXT: kandw %k0, %k1, %k0
; CHECK-NEXT: kandw %k0, %k2, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
ret i16 %t2
}
declare i16 @llvm.x86.avx512.kandn.w(i16, i16) nounwind readnone
define i16 @test_kandn(i16 %a0, i16 %a1) {
; CHECK-LABEL: test_kandn:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k0
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: movw $8, %ax
; CHECK-NEXT: kmovw %eax, %k2
; CHECK-NEXT: kandnw %k2, %k1, %k1
; CHECK-NEXT: kandnw %k0, %k1, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kandn.w(i16 %t1, i16 %a1)
ret i16 %t2
}
declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
define i16 @test_knot(i16 %a0) {
; CHECK-LABEL: test_knot:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: knotw %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
ret i16 %res
}
declare i16 @llvm.x86.avx512.kor.w(i16, i16) nounwind readnone
define i16 @test_kor(i16 %a0, i16 %a1) {
; CHECK-LABEL: test_kor:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k0
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: movw $8, %ax
; CHECK-NEXT: kmovw %eax, %k2
; CHECK-NEXT: korw %k0, %k1, %k0
; CHECK-NEXT: korw %k0, %k2, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kor.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kor.w(i16 %t1, i16 %a1)
ret i16 %t2
}
declare i16 @llvm.x86.avx512.kxnor.w(i16, i16) nounwind readnone
; TODO: the two kxnor instructions here a no op and should be elimintaed,
; probably by FoldConstantArithmetic in SelectionDAG.
define i16 @test_kxnor(i16 %a0, i16 %a1) {
; CHECK-LABEL: test_kxnor:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k0
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: movw $8, %ax
; CHECK-NEXT: kmovw %eax, %k2
; CHECK-NEXT: kxorw %k0, %k1, %k0
; CHECK-NEXT: kxorw %k0, %k2, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kxnor.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kxnor.w(i16 %t1, i16 %a1)
ret i16 %t2
}
declare i16 @llvm.x86.avx512.kxor.w(i16, i16) nounwind readnone
define i16 @test_kxor(i16 %a0, i16 %a1) {
; CHECK-LABEL: test_kxor:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k0
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: movw $8, %ax
; CHECK-NEXT: kmovw %eax, %k2
; CHECK-NEXT: kxorw %k0, %k1, %k0
; CHECK-NEXT: kxorw %k0, %k2, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kxor.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kxor.w(i16 %t1, i16 %a1)
ret i16 %t2
}
define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
; CHECK-LABEL: test_rcp_ps_512:
; CHECK: ## %bb.0: