mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-24 13:06:56 +00:00
[X86] Fix a bug in the lowering of BLENDI introduced in r209043.
ISD::VSELECT mask uses 1 to identify the first argument and 0 to identify the second argument. On the other hand, BLENDI uses 0 to identify the first argument and 1 to identify the second argument. Fix the generation of the blend mask to account for this difference. The bug did not show up with r209043, because we were not checking for the actual arguments of the blend instruction! This commit also fixes the test cases. Note: The same mask works for the BLENDr variant because the arguments are swapped during instruction selection (see the BLENDXXrr patterns). <rdar://problem/16975435> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209324 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d1b5bdaebd
commit
fd0096a42c
@ -7980,7 +7980,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// This function assumes its argument is a BUILD_VECTOR of constand or
|
||||
// This function assumes its argument is a BUILD_VECTOR of constants or
|
||||
// undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is
|
||||
// true.
|
||||
static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
|
||||
@ -8004,9 +8004,13 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
|
||||
Lane2Cond = !isZero(SndLaneEltCond);
|
||||
|
||||
if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
|
||||
MaskValue |= !!Lane1Cond << i;
|
||||
// Lane1Cond != 0, means we want the first argument.
|
||||
// Lane1Cond == 0, means we want the second argument.
|
||||
// The encoding of this argument is 0 for the first argument, 1
|
||||
// for the second. Therefore, invert the condition.
|
||||
MaskValue |= !Lane1Cond << i;
|
||||
else if (Lane1Cond < 0)
|
||||
MaskValue |= !!Lane2Cond << i;
|
||||
MaskValue |= !Lane2Cond << i;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
@ -3,7 +3,16 @@
|
||||
; AVX128 tests:
|
||||
|
||||
;CHECK-LABEL: vsel_float:
|
||||
;CHECK: vblendps $5
|
||||
; select mask is <i1 true, i1 false, i1 true, i1 false>.
|
||||
; Big endian representation is 0101 = 5.
|
||||
; '1' means takes the first argument, '0' means takes the second argument.
|
||||
; This is the opposite of the intel syntax, thus we expect
|
||||
; the inverted mask: 1010 = 10.
|
||||
; According to the ABI:
|
||||
; v1 is in xmm0 => first argument is xmm0.
|
||||
; v2 is in xmm1 => second argument is xmm1.
|
||||
; result is in xmm0 => destination argument.
|
||||
;CHECK: vblendps $10, %xmm1, %xmm0, %xmm0
|
||||
;CHECK: ret
|
||||
define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
|
||||
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2
|
||||
@ -12,7 +21,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
|
||||
|
||||
|
||||
;CHECK-LABEL: vsel_i32:
|
||||
;CHECK: vblendps $5
|
||||
;CHECK: vblendps $10, %xmm1, %xmm0, %xmm0
|
||||
;CHECK: ret
|
||||
define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
|
||||
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
|
||||
@ -52,7 +61,13 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
|
||||
|
||||
;CHECK-LABEL: vsel_float8:
|
||||
;CHECK-NOT: vinsertf128
|
||||
;CHECK: vblendps $17
|
||||
; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
|
||||
; which translates into the boolean mask (big endian representation):
|
||||
; 00010001 = 17.
|
||||
; '1' means takes the first argument, '0' means takes the second argument.
|
||||
; This is the opposite of the intel syntax, thus we expect
|
||||
; the inverted mask: 11101110 = 238.
|
||||
;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0
|
||||
;CHECK: ret
|
||||
define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
|
||||
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
|
||||
@ -61,7 +76,7 @@ define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
|
||||
|
||||
;CHECK-LABEL: vsel_i328:
|
||||
;CHECK-NOT: vinsertf128
|
||||
;CHECK: vblendps $17
|
||||
;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0
|
||||
;CHECK-NEXT: ret
|
||||
define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
|
||||
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
|
||||
@ -69,8 +84,15 @@ define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
|
||||
}
|
||||
|
||||
;CHECK-LABEL: vsel_double8:
|
||||
;CHECK: vblendpd $1
|
||||
;CHECK: vblendpd $1
|
||||
; select mask is 2x: 0001 => intel mask: ~0001 = 14
|
||||
; ABI:
|
||||
; v1 is in ymm0 and ymm1.
|
||||
; v2 is in ymm2 and ymm3.
|
||||
; result is in ymm0 and ymm1.
|
||||
; Compute the low part: res.low = blend v1.low, v2.low, blendmask
|
||||
;CHECK: vblendpd $14, %ymm2, %ymm0, %ymm0
|
||||
; Compute the high part.
|
||||
;CHECK: vblendpd $14, %ymm3, %ymm1, %ymm1
|
||||
;CHECK: ret
|
||||
define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
|
||||
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2
|
||||
@ -78,8 +100,8 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
|
||||
}
|
||||
|
||||
;CHECK-LABEL: vsel_i648:
|
||||
;CHECK: vblendpd $1
|
||||
;CHECK: vblendpd $1
|
||||
;CHECK: vblendpd $14, %ymm2, %ymm0, %ymm0
|
||||
;CHECK: vblendpd $14, %ymm3, %ymm1, %ymm1
|
||||
;CHECK: ret
|
||||
define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
|
||||
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2
|
||||
|
@ -22,7 +22,17 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
|
||||
}
|
||||
|
||||
;CHECK-LABEL: vsel_8xi16:
|
||||
;CHECK: pblendw $17
|
||||
; The select mask is
|
||||
; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
|
||||
; which translates into the boolean mask (big endian representation):
|
||||
; 00010001 = 17.
|
||||
; '1' means takes the first argument, '0' means takes the second argument.
|
||||
; This is the opposite of the intel syntax, thus we expect
|
||||
; the inverted mask: 11101110 = 238.
|
||||
; According to the ABI:
|
||||
; v1 is in xmm0 => first argument is xmm0.
|
||||
; v2 is in xmm1 => second argument is xmm1.
|
||||
;CHECK: pblendw $238, %xmm1, %xmm0
|
||||
;CHECK: ret
|
||||
define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
|
||||
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
|
||||
|
Loading…
Reference in New Issue
Block a user