[X86][XOP] Add support for the matching of the VPCMOV bit select instruction

XOP has the VPCMOV instruction that performs the common vector bit select operation OR( AND( SRC1, SRC3 ), AND( SRC2, ~SRC3 ) )

This patch adds tablegen pattern matching for this instruction.

Differential Revision: http://reviews.llvm.org/D8841

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251975 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Simon Pilgrim 2015-11-03 20:27:01 +00:00
parent 7c5ec54082
commit 91c642526e
4 changed files with 185 additions and 3 deletions

View File

@ -200,6 +200,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "x86.avx2.pblendd.128" ||
Name == "x86.avx2.pblendd.256" ||
Name == "x86.avx2.vbroadcasti128" ||
Name == "x86.xop.vpcmov" ||
(Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
NewFn = nullptr;
return true;
@ -457,6 +458,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep =
Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
Builder.getInt8(Imm)});
} else if (Name == "llvm.x86.xop.vpcmov") {
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
Value *Sel = CI->getArgOperand(2);
unsigned NumElts = CI->getType()->getVectorNumElements();
Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1));
Value *NotSel = Builder.CreateXor(Sel, MinusOne);
Value *Sel0 = Builder.CreateAnd(Arg0, Sel);
Value *Sel1 = Builder.CreateAnd(Arg1, NotSel);
Rep = Builder.CreateOr(Sel0, Sel1);
} else if (Name == "llvm.x86.sse42.crc32.64.8") {
Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_sse42_crc32_32_8);

View File

@ -281,6 +281,16 @@ multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
let ExeDomain = SSEPackedInt in
defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
let Predicates = [HasXOP] in {
def : Pat<(v2i64 (or (and VR128:$src3, VR128:$src1),
(X86andnp VR128:$src3, VR128:$src2))),
(VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(v4i64 (or (and VR256:$src3, VR256:$src1),
(X86andnp VR256:$src3, VR256:$src2))),
(VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
}
multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),

View File

@ -61,15 +61,14 @@ define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float>
declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
; CHECK: vpcmov
; CHECK: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
%res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
; CHECK: vpcmov
; CHECK: ymm
; CHECK: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
%res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
ret <4 x i64> %res
}

View File

@ -0,0 +1,162 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop | FileCheck %s
define <4 x double> @pcmov_4f64(<4 x double> %a, <4 x double> %b, <4 x double> %m) {
; CHECK-LABEL: pcmov_4f64:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = bitcast <4 x double> %m to <4 x i64>
%2 = bitcast <4 x double> %a to <4 x i64>
%3 = and <4 x i64> %1, %2
%4 = xor <4 x i64> %1, <i64 -1, i64 -1, i64 -1, i64 -1>
%5 = bitcast <4 x double> %b to <4 x i64>
%6 = and <4 x i64> %4, %5
%7 = or <4 x i64> %3, %6
%8 = bitcast <4 x i64> %7 to <4 x double>
ret <4 x double> %8
}
define <2 x double> @pcmov_2f64(<2 x double> %a, <2 x double> %b, <2 x double> %m) {
; CHECK-LABEL: pcmov_2f64:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = bitcast <2 x double> %m to <2 x i64>
%2 = bitcast <2 x double> %a to <2 x i64>
%3 = and <2 x i64> %1, %2
%4 = xor <2 x i64> %1, <i64 -1, i64 -1>
%5 = bitcast <2 x double> %b to <2 x i64>
%6 = and <2 x i64> %4, %5
%7 = or <2 x i64> %3, %6
%8 = bitcast <2 x i64> %7 to <2 x double>
ret <2 x double> %8
}
define <8 x float> @pcmov_8f32(<8 x float> %a, <8 x float> %b, <8 x float> %m) {
; CHECK-LABEL: pcmov_8f32:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = bitcast <8 x float> %m to <8 x i32>
%2 = bitcast <8 x float> %a to <8 x i32>
%3 = and <8 x i32> %1, %2
%4 = xor <8 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
%5 = bitcast <8 x float> %b to <8 x i32>
%6 = and <8 x i32> %4, %5
%7 = or <8 x i32> %3, %6
%8 = bitcast <8 x i32> %7 to <8 x float>
ret <8 x float> %8
}
define <4 x float> @pcmov_4f32(<4 x float> %a, <4 x float> %b, <4 x float> %m) {
; CHECK-LABEL: pcmov_4f32:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = bitcast <4 x float> %m to <4 x i32>
%2 = bitcast <4 x float> %a to <4 x i32>
%3 = and <4 x i32> %1, %2
%4 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
%5 = bitcast <4 x float> %b to <4 x i32>
%6 = and <4 x i32> %4, %5
%7 = or <4 x i32> %3, %6
%8 = bitcast <4 x i32> %7 to <4 x float>
ret <4 x float> %8
}
define <4 x i64> @pcmov_4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %m) {
; CHECK-LABEL: pcmov_4i64:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = and <4 x i64> %a, %m
%2 = xor <4 x i64> %m, <i64 -1, i64 -1, i64 -1, i64 -1>
%3 = and <4 x i64> %b, %2
%4 = or <4 x i64> %1, %3
ret <4 x i64> %4
}
define <2 x i64> @pcmov_2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %m) {
; CHECK-LABEL: pcmov_2i64:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = and <2 x i64> %a, %m
%2 = xor <2 x i64> %m, <i64 -1, i64 -1>
%3 = and <2 x i64> %b, %2
%4 = or <2 x i64> %1, %3
ret <2 x i64> %4
}
define <8 x i32> @pcmov_8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %m) {
; CHECK-LABEL: pcmov_8i32:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = and <8 x i32> %a, %m
%2 = xor <8 x i32> %m, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
%3 = and <8 x i32> %b, %2
%4 = or <8 x i32> %1, %3
ret <8 x i32> %4
}
define <4 x i32> @pcmov_4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %m) {
; CHECK-LABEL: pcmov_4i32:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = and <4 x i32> %a, %m
%2 = xor <4 x i32> %m, <i32 -1, i32 -1, i32 -1, i32 -1>
%3 = and <4 x i32> %b, %2
%4 = or <4 x i32> %1, %3
ret <4 x i32> %4
}
define <16 x i16> @pcmov_16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %m) {
; CHECK-LABEL: pcmov_16i16:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = and <16 x i16> %a, %m
%2 = xor <16 x i16> %m, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
%3 = and <16 x i16> %b, %2
%4 = or <16 x i16> %1, %3
ret <16 x i16> %4
}
define <8 x i16> @pcmov_8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %m) {
; CHECK-LABEL: pcmov_8i16:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = and <8 x i16> %a, %m
%2 = xor <8 x i16> %m, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
%3 = and <8 x i16> %b, %2
%4 = or <8 x i16> %1, %3
ret <8 x i16> %4
}
define <32 x i8> @pcmov_32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %m) {
; CHECK-LABEL: pcmov_32i8:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = and <32 x i8> %a, %m
%2 = xor <32 x i8> %m, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%3 = and <32 x i8> %b, %2
%4 = or <32 x i8> %1, %3
ret <32 x i8> %4
}
define <16 x i8> @pcmov_16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %m) {
; CHECK-LABEL: pcmov_16i8:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = and <16 x i8> %a, %m
%2 = xor <16 x i8> %m, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%3 = and <16 x i8> %b, %2
%4 = or <16 x i8> %1, %3
ret <16 x i8> %4
}