[X86][XOP] Add support for the matching of the VPCMOV bit select instruction

XOP has the VPCMOV instruction that performs the common vector bit select operation OR( AND( SRC1, SRC3 ), AND( SRC2, ~SRC3 ) )

This patch adds tablegen pattern matching for this instruction.

Differential Revision: http://reviews.llvm.org/D8841

llvm-svn: 251975
This commit is contained in:
Simon Pilgrim 2015-11-03 20:27:01 +00:00
parent 2aae8dc2fb
commit ac4c196247
4 changed files with 185 additions and 3 deletions

View File

@ -200,6 +200,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "x86.avx2.pblendd.128" ||
Name == "x86.avx2.pblendd.256" ||
Name == "x86.avx2.vbroadcasti128" ||
Name == "x86.xop.vpcmov" ||
(Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
NewFn = nullptr;
return true;
@ -457,6 +458,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep =
Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
Builder.getInt8(Imm)});
} else if (Name == "llvm.x86.xop.vpcmov") {
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
Value *Sel = CI->getArgOperand(2);
unsigned NumElts = CI->getType()->getVectorNumElements();
Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1));
Value *NotSel = Builder.CreateXor(Sel, MinusOne);
Value *Sel0 = Builder.CreateAnd(Arg0, Sel);
Value *Sel1 = Builder.CreateAnd(Arg1, NotSel);
Rep = Builder.CreateOr(Sel0, Sel1);
} else if (Name == "llvm.x86.sse42.crc32.64.8") {
Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_sse42_crc32_32_8);

View File

@ -281,6 +281,16 @@ multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
let ExeDomain = SSEPackedInt in
defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
let Predicates = [HasXOP] in {
def : Pat<(v2i64 (or (and VR128:$src3, VR128:$src1),
(X86andnp VR128:$src3, VR128:$src2))),
(VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(v4i64 (or (and VR256:$src3, VR256:$src1),
(X86andnp VR256:$src3, VR256:$src2))),
(VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
}
multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),

View File

@ -61,15 +61,14 @@ define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float>
declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
; CHECK: vpcmov
; CHECK: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
%res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
; CHECK: vpcmov
; CHECK: ymm
; CHECK: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
%res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
ret <4 x i64> %res
}

View File

@ -0,0 +1,162 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop | FileCheck %s
define <4 x double> @pcmov_4f64(<4 x double> %a, <4 x double> %b, <4 x double> %m) {
; CHECK-LABEL: pcmov_4f64:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = bitcast <4 x double> %m to <4 x i64>
%2 = bitcast <4 x double> %a to <4 x i64>
%3 = and <4 x i64> %1, %2
%4 = xor <4 x i64> %1, <i64 -1, i64 -1, i64 -1, i64 -1>
%5 = bitcast <4 x double> %b to <4 x i64>
%6 = and <4 x i64> %4, %5
%7 = or <4 x i64> %3, %6
%8 = bitcast <4 x i64> %7 to <4 x double>
ret <4 x double> %8
}
define <2 x double> @pcmov_2f64(<2 x double> %a, <2 x double> %b, <2 x double> %m) {
; CHECK-LABEL: pcmov_2f64:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = bitcast <2 x double> %m to <2 x i64>
%2 = bitcast <2 x double> %a to <2 x i64>
%3 = and <2 x i64> %1, %2
%4 = xor <2 x i64> %1, <i64 -1, i64 -1>
%5 = bitcast <2 x double> %b to <2 x i64>
%6 = and <2 x i64> %4, %5
%7 = or <2 x i64> %3, %6
%8 = bitcast <2 x i64> %7 to <2 x double>
ret <2 x double> %8
}
define <8 x float> @pcmov_8f32(<8 x float> %a, <8 x float> %b, <8 x float> %m) {
; CHECK-LABEL: pcmov_8f32:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = bitcast <8 x float> %m to <8 x i32>
%2 = bitcast <8 x float> %a to <8 x i32>
%3 = and <8 x i32> %1, %2
%4 = xor <8 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
%5 = bitcast <8 x float> %b to <8 x i32>
%6 = and <8 x i32> %4, %5
%7 = or <8 x i32> %3, %6
%8 = bitcast <8 x i32> %7 to <8 x float>
ret <8 x float> %8
}
define <4 x float> @pcmov_4f32(<4 x float> %a, <4 x float> %b, <4 x float> %m) {
; CHECK-LABEL: pcmov_4f32:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = bitcast <4 x float> %m to <4 x i32>
%2 = bitcast <4 x float> %a to <4 x i32>
%3 = and <4 x i32> %1, %2
%4 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
%5 = bitcast <4 x float> %b to <4 x i32>
%6 = and <4 x i32> %4, %5
%7 = or <4 x i32> %3, %6
%8 = bitcast <4 x i32> %7 to <4 x float>
ret <4 x float> %8
}
define <4 x i64> @pcmov_4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %m) {
; CHECK-LABEL: pcmov_4i64:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = and <4 x i64> %a, %m
%2 = xor <4 x i64> %m, <i64 -1, i64 -1, i64 -1, i64 -1>
%3 = and <4 x i64> %b, %2
%4 = or <4 x i64> %1, %3
ret <4 x i64> %4
}
define <2 x i64> @pcmov_2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %m) {
; CHECK-LABEL: pcmov_2i64:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = and <2 x i64> %a, %m
%2 = xor <2 x i64> %m, <i64 -1, i64 -1>
%3 = and <2 x i64> %b, %2
%4 = or <2 x i64> %1, %3
ret <2 x i64> %4
}
define <8 x i32> @pcmov_8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %m) {
; CHECK-LABEL: pcmov_8i32:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = and <8 x i32> %a, %m
%2 = xor <8 x i32> %m, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
%3 = and <8 x i32> %b, %2
%4 = or <8 x i32> %1, %3
ret <8 x i32> %4
}
define <4 x i32> @pcmov_4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %m) {
; CHECK-LABEL: pcmov_4i32:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = and <4 x i32> %a, %m
%2 = xor <4 x i32> %m, <i32 -1, i32 -1, i32 -1, i32 -1>
%3 = and <4 x i32> %b, %2
%4 = or <4 x i32> %1, %3
ret <4 x i32> %4
}
define <16 x i16> @pcmov_16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %m) {
; CHECK-LABEL: pcmov_16i16:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = and <16 x i16> %a, %m
%2 = xor <16 x i16> %m, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
%3 = and <16 x i16> %b, %2
%4 = or <16 x i16> %1, %3
ret <16 x i16> %4
}
define <8 x i16> @pcmov_8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %m) {
; CHECK-LABEL: pcmov_8i16:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = and <8 x i16> %a, %m
%2 = xor <8 x i16> %m, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
%3 = and <8 x i16> %b, %2
%4 = or <8 x i16> %1, %3
ret <8 x i16> %4
}
define <32 x i8> @pcmov_32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %m) {
; CHECK-LABEL: pcmov_32i8:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = and <32 x i8> %a, %m
%2 = xor <32 x i8> %m, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%3 = and <32 x i8> %b, %2
%4 = or <32 x i8> %1, %3
ret <32 x i8> %4
}
define <16 x i8> @pcmov_16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %m) {
; CHECK-LABEL: pcmov_16i8:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = and <16 x i8> %a, %m
%2 = xor <16 x i8> %m, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%3 = and <16 x i8> %b, %2
%4 = or <16 x i8> %1, %3
ret <16 x i8> %4
}